In [1]:
"""Implementations of algorithms for continuous control."""
import functools
from jaxrl_m.typing import *

import jax
import jax.numpy as jnp
import numpy as np
import optax
from jaxrl_m.common import TrainState, target_update, nonpytree_field
from jaxrl_m.networks import Policy, Critic, ensemblize

import flax
import flax.linen as nn

class Temperature(nn.Module):
    initial_temperature: float = 1.0

    @nn.compact
    def __call__(self) -> jnp.ndarray:
        log_temp = self.param('log_temp',
                              init_fn=lambda key: jnp.full(
                                  (), jnp.log(self.initial_temperature)))
        return jnp.exp(log_temp)

class SACAgent(flax.struct.PyTreeNode):
    rng: PRNGKey
    critic: TrainState
    target_critic: TrainState
    actor: TrainState
    temp: TrainState
    config: dict = nonpytree_field()

    @jax.jit
    def update(agent, batch: Batch):
        new_rng, curr_key, next_key = jax.random.split(agent.rng, 3)

        def critic_loss_fn(critic_params):
            next_dist = agent.actor(batch['next_observations'])
            next_actions, next_log_probs = next_dist.sample_and_log_prob(seed=next_key)

            next_q1, next_q2 = agent.target_critic(batch['next_observations'], next_actions)
            next_q = jnp.minimum(next_q1, next_q2)
            target_q = batch['rewards'] + agent.config['discount'] * batch['masks'] * next_q

            if agent.config['backup_entropy']:
                target_q = target_q - agent.config['discount'] * batch['masks'] * next_log_probs * agent.temp()
            
            q1, q2 = agent.critic(batch['observations'], batch['actions'], params=critic_params)
            critic_loss = ((q1 - target_q)**2 + (q2 - target_q)**2).mean()
            
            return critic_loss, {
                'critic_loss': critic_loss,
                'q1': q1.mean(),
            }        

        def actor_loss_fn(actor_params):
            dist = agent.actor(batch['observations'], params=actor_params)
            actions, log_probs = dist.sample_and_log_prob(seed=curr_key)
            
            q1, q2 = agent.critic(batch['observations'], actions)
            q = jnp.minimum(q1, q2)

            actor_loss = (log_probs * agent.temp() - q).mean()
            return actor_loss, {
                'actor_loss': actor_loss,
                'entropy': -1 * log_probs.mean(),
            }
        
        def temp_loss_fn(temp_params, entropy, target_entropy):
            temperature = agent.temp(params=temp_params)
            temp_loss = (temperature * (entropy - target_entropy)).mean()
            return temp_loss, {
                'temp_loss': temp_loss,
                'temperature': temperature,
            }
        
        new_critic, critic_info = agent.critic.apply_loss_fn(loss_fn=critic_loss_fn, has_aux=True)
        new_target_critic = target_update(agent.critic, agent.target_critic, agent.config['target_update_rate'])
        new_actor, actor_info = agent.actor.apply_loss_fn(loss_fn=actor_loss_fn, has_aux=True)

        temp_loss_fn = functools.partial(temp_loss_fn, entropy=actor_info['entropy'], target_entropy=agent.config['target_entropy'])
        new_temp, temp_info = agent.temp.apply_loss_fn(loss_fn=temp_loss_fn, has_aux=True)

        return agent.replace(rng=new_rng, critic=new_critic, target_critic=new_target_critic, actor=new_actor, temp=new_temp), {
            **critic_info, **actor_info, **temp_info}

    @jax.jit
    def sample_actions(agent,
                       observations: np.ndarray,
                       *,
                       seed: PRNGKey,
                       temperature: float = 1.0,
                       ) -> jnp.ndarray:
        actions = agent.actor(observations, temperature=temperature).sample(seed=seed)
        actions = jnp.clip(actions, -1, 1)
        return actions



def create_learner(
                 seed: int,
                 observations: jnp.ndarray,
                 actions: jnp.ndarray,
                 actor_lr: float = 3e-4,
                 critic_lr: float = 3e-4,
                 temp_lr: float = 3e-4,
                 hidden_dims: Sequence[int] = (256, 256),
                 discount: float = 0.99,
                 tau: float = 0.005,
                 target_entropy: float = None,
                 backup_entropy: bool = True,
            **kwargs):

        print('Extra kwargs:', kwargs)

        rng = jax.random.PRNGKey(seed)
        rng, actor_key, critic_key = jax.random.split(rng, 3)

        action_dim = actions.shape[-1]
        actor_def = Policy(hidden_dims, action_dim=action_dim, 
            log_std_min=-10.0, state_dependent_std=True, tanh_squash_distribution=True, final_fc_init_scale=1.0)

        actor_params = actor_def.init(actor_key, observations)['params']
        actor = TrainState.create(actor_def, actor_params, tx=optax.adam(learning_rate=actor_lr))

        critic_def = ensemblize(Critic, num_qs=2)(hidden_dims)
        critic_params = critic_def.init(critic_key, observations, actions)['params']
        critic = TrainState.create(critic_def, critic_params, tx=optax.adam(learning_rate=critic_lr))
        target_critic = TrainState.create(critic_def, critic_params)

        temp_def = Temperature()
        temp_params = temp_def.init(rng)['params']
        temp = TrainState.create(temp_def, temp_params, tx=optax.adam(learning_rate=temp_lr))

        if target_entropy is None:
            target_entropy = -0.5 * action_dim

        config = flax.core.FrozenDict(dict(
            discount=discount,
            target_update_rate=tau,
            target_entropy=target_entropy,
            backup_entropy=backup_entropy,            
        ))

        return SACAgent(rng, critic=critic, target_critic=target_critic, actor=actor, temp=temp, config=config)

def get_default_config():
    import ml_collections

    return ml_collections.ConfigDict({
        'actor_lr': 3e-4,
        'critic_lr': 3e-4,
        'temp_lr': 3e-4,
        'hidden_dims': (256, 256),
        'discount': 0.99,
        'tau': 0.005,
        'target_entropy': ml_collections.config_dict.placeholder(float),
        'backup_entropy': True,
    })

In [3]:
import os
from functools import partial
import numpy as np
import jax
import tqdm
import gymnasium as gym

import examples.mujoco.sac as learner

from jaxrl_m.wandb import setup_wandb, default_wandb_config, get_flag_dict
import wandb
from jaxrl_m.evaluation import supply_rng, evaluate, flatten, EpisodeMonitor
from jaxrl_m.dataset import ReplayBuffer

#from ml_collections import config_flags
import pickle
#from flax.training import checkpoints


#FLAGS = flags.FLAGS
env_name='Humanoid-v4'
seed=np.random.choice(1000000)
eval_episodes=10
batch_size = 256
max_steps = int(1e6)
start_steps = int(1e4)                     
log_interval = 10000
eval_interval = 10000

wandb_config = default_wandb_config()
wandb_config.update({
    'project': 'd4rl_test',
    'group': 'sac_test',
    'name': 'sac_{env_name}',
})


env = EpisodeMonitor(gym.make(env_name))
eval_env = EpisodeMonitor(gym.make(env_name))
setup_wandb({"bonjour":1})

example_transition = dict(
    observations=env.observation_space.sample(),
    actions=env.action_space.sample(),
    rewards=0.0,
    masks=1.0,
    next_observations=env.observation_space.sample(),
)

replay_buffer = ReplayBuffer.create(example_transition, size=int(1e6))

agent = learner.create_learner(seed,
                example_transition['observations'][None],
                example_transition['actions'][None],
                max_steps=max_steps,
                #**FLAGS.config
                )

exploration_metrics = dict()
obs,info = env.reset()    
exploration_rng = jax.random.PRNGKey(0)

for i in tqdm.tqdm(range(1, max_steps + 1),
                    smoothing=0.1,
                    dynamic_ncols=True):

    if i < start_steps:
        action = env.action_space.sample()
    else:
        exploration_rng, key = jax.random.split(exploration_rng)
        action = agent.sample_actions(obs, seed=key)

    #next_obs, reward, done, info = env.step(action)
    next_obs, reward, done, truncated, info = env.step(action)
    
    mask = float(not done or 'TimeLimit.truncated' in info)
    
    replay_buffer.add_transition(dict(
        observations=obs,
        actions=action,
        rewards=reward,
        masks=mask,
        next_observations=next_obs,
    ))
    obs = next_obs

    if (done or truncated):
        exploration_metrics = {f'exploration/{k}': v for k, v in flatten(info).items()}
        obs,info= env.reset()

    if replay_buffer.size < start_steps:
        continue

    batch = replay_buffer.sample(batch_size)  
    agent, update_info = agent.update(batch)

    if i % log_interval == 0:
        train_metrics = {f'training/{k}': v for k, v in update_info.items()}
        wandb.log(train_metrics, step=i)
        wandb.log(exploration_metrics, step=i)
        exploration_metrics = dict()

    if i % eval_interval == 0:
        
        
        policy_fn = partial(supply_rng(agent.sample_actions), temperature=0.0)
        eval_info = evaluate(policy_fn, eval_env, num_episodes=eval_episodes)
        eval_metrics = {f'evaluation/{k}': v for k, v in eval_info.items()}
        wandb.log(eval_metrics, step=i)

    # if i % FLAGS.save_interval == 0 and FLAGS.save_dir is not None:
    #     checkpoints.save_checkpoint(FLAGS.save_dir, agent, i)





0,1
evaluation/episode.duration,█▆▆▄▆▅▆▇▄▇▅▅▆▇▇▅▄▁█▅▇▄▅▃▃▇▆▆▅▅▆▇▇▄▅▆▆▆▄▂
evaluation/episode.length,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
evaluation/episode.return,▁▂▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████▇██████
evaluation/final.episode.duration,█▆▆▄▆▅▆▇▄▇▅▅▆▇▇▅▄▁█▅▇▄▅▃▃▇▆▆▅▅▆▇▇▄▅▆▆▆▄▂
evaluation/final.episode.length,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
evaluation/final.episode.return,▁▂▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████▇██████
evaluation/final.reward_ctrl,█▂▁▂▂▂▂▃▃▃▃▃▃▃▂▃▂▂▄▃▃▃▂▃▂▃▃▄▄▃▃▃▄▃▄▃▃▃▄▄
evaluation/final.reward_run,▁▁▄▄▅▅▅▅▅▆▆▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇██████
evaluation/final.total.timesteps,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
evaluation/final.x_position,▁▂▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████▇██████

0,1
evaluation/episode.duration,0.30081
evaluation/episode.length,1000.0
evaluation/episode.return,11648.63946
evaluation/final.episode.duration,0.30081
evaluation/final.episode.length,1000.0
evaluation/final.episode.return,11648.63946
evaluation/final.reward_ctrl,-0.24753
evaluation/final.reward_run,12.8493
evaluation/final.total.timesteps,995500.0
evaluation/final.x_position,596.15315


Extra kwargs: {'max_steps': 1000000}


  logger.warn(
  1%|          | 9991/1000000 [00:01<03:09, 5221.47it/s]

{'evaluation/reward_linvel': 0.04036260457808817, 'evaluation/reward_quadctrl': -1.5370676755205086, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.01969457311990607, 'evaluation/y_position': 0.012533799396730008, 'evaluation/distance_from_origin': 0.028180817394746163, 'evaluation/x_velocity': 0.032290083662470534, 'evaluation/y_velocity': 0.17864813009504163, 'evaluation/forward_reward': 0.04036260457808817, 'evaluation/total.timesteps': 98.0, 'evaluation/episode.return': 68.3142511166228, 'evaluation/episode.length': 19.5, 'evaluation/episode.duration': 0.019856739044189452, 'evaluation/final.reward_linvel': 0.10087588239553355, 'evaluation/final.reward_quadctrl': -1.584960444662704, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.027166398218548438, 'evaluation/final.y_position': 0.05070850314084596, 'evaluation/final.distance_from_origin': 0.058067008749369445, 'evaluation/final.x_velocity': 0.08070070591642683, 'evaluation/final.y_velocity': 0.3

  2%|▏         | 20121/1000000 [00:16<26:24, 618.60it/s] 

{'evaluation/reward_linvel': 0.07711359107285716, 'evaluation/reward_quadctrl': -0.5976331483672356, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.03461992982862208, 'evaluation/y_position': 0.03249191405019019, 'evaluation/distance_from_origin': 0.06314011948903242, 'evaluation/x_velocity': 0.061690872858285735, 'evaluation/y_velocity': -0.03496341010407267, 'evaluation/forward_reward': 0.07711359107285716, 'evaluation/total.timesteps': 496.0, 'evaluation/episode.return': 269.2167746066078, 'evaluation/episode.length': 60.1, 'evaluation/episode.duration': 0.04180684089660645, 'evaluation/final.reward_linvel': 0.2715500239344956, 'evaluation/final.reward_quadctrl': -0.5220051542351538, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.0694832093715193, 'evaluation/final.y_position': -0.030779068207731863, 'evaluation/final.distance_from_origin': 0.12760710200018138, 'evaluation/final.x_velocity': 0.2172400191475965, 'evaluation/final.y_velocity': -0.58

  3%|▎         | 30115/1000000 [00:28<25:28, 634.47it/s]

{'evaluation/reward_linvel': 0.27421854127424017, 'evaluation/reward_quadctrl': -0.7487935764212633, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.06739296048606018, 'evaluation/y_position': -0.056525132916319915, 'evaluation/distance_from_origin': 0.12046042990361792, 'evaluation/x_velocity': 0.21937483301939212, 'evaluation/y_velocity': -0.16736632885857813, 'evaluation/forward_reward': 0.27421854127424017, 'evaluation/total.timesteps': 1122.5, 'evaluation/episode.return': 295.0577077084141, 'evaluation/episode.length': 65.2, 'evaluation/episode.duration': 0.03495035171508789, 'evaluation/final.reward_linvel': 0.812555880998041, 'evaluation/final.reward_quadctrl': -0.9348289116708827, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.229002496004067, 'evaluation/final.y_position': -0.15858733685146048, 'evaluation/final.distance_from_origin': 0.37163743585747133, 'evaluation/final.x_velocity': 0.6500447047984328, 'evaluation/final.y_velocity': -0.566

  4%|▍         | 40124/1000000 [00:41<26:58, 593.23it/s]

{'evaluation/reward_linvel': 0.38176987882813657, 'evaluation/reward_quadctrl': -0.7340024727984116, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.13804740253656697, 'evaluation/y_position': 0.05978425059292532, 'evaluation/distance_from_origin': 0.16442872410397913, 'evaluation/x_velocity': 0.3054159030625093, 'evaluation/y_velocity': 0.164663587566285, 'evaluation/forward_reward': 0.38176987882813657, 'evaluation/total.timesteps': 1838.0, 'evaluation/episode.return': 362.06108092971556, 'evaluation/episode.length': 77.9, 'evaluation/episode.duration': 0.043522071838378903, 'evaluation/final.reward_linvel': 0.9091679137300079, 'evaluation/final.reward_quadctrl': -0.9741142112918141, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.37495941499006474, 'evaluation/final.y_position': 0.19269038654317025, 'evaluation/final.distance_from_origin': 0.4936454749443011, 'evaluation/final.x_velocity': 0.7273343309840065, 'evaluation/final.y_velocity': 0.5398747

  5%|▌         | 50104/1000000 [00:53<27:50, 568.80it/s]

{'evaluation/reward_linvel': 0.3509577387489604, 'evaluation/reward_quadctrl': -0.7231599816772449, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.14265039575376717, 'evaluation/y_position': 0.04430533567800168, 'evaluation/distance_from_origin': 0.17216965014019425, 'evaluation/x_velocity': 0.28076619099916833, 'evaluation/y_velocity': 0.03931452110624653, 'evaluation/forward_reward': 0.3509577387489604, 'evaluation/total.timesteps': 2691.0, 'evaluation/episode.return': 428.99685208054797, 'evaluation/episode.length': 92.7, 'evaluation/episode.duration': 0.05027241706848144, 'evaluation/final.reward_linvel': 0.8254140861038204, 'evaluation/final.reward_quadctrl': -0.9603077990569892, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.40254514518032225, 'evaluation/final.y_position': 0.050312510777930156, 'evaluation/final.distance_from_origin': 0.5423474923056509, 'evaluation/final.x_velocity': 0.6603312688830563, 'evaluation/final.y_velocity': -0.01254

  6%|▌         | 60140/1000000 [01:05<26:14, 597.04it/s]

{'evaluation/reward_linvel': 0.4545176120865133, 'evaluation/reward_quadctrl': -0.7264117380133668, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.17247656818314214, 'evaluation/y_position': 0.09832984547871704, 'evaluation/distance_from_origin': 0.21630679870215583, 'evaluation/x_velocity': 0.3636140896692106, 'evaluation/y_velocity': 0.19536353922582092, 'evaluation/forward_reward': 0.4545176120865133, 'evaluation/total.timesteps': 3576.0, 'evaluation/episode.return': 398.5793251843662, 'evaluation/episode.length': 84.3, 'evaluation/episode.duration': 0.04499275684356689, 'evaluation/final.reward_linvel': 0.9945454144951273, 'evaluation/final.reward_quadctrl': -1.0092365397034178, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.476636957412634, 'evaluation/final.y_position': 0.24202580359566156, 'evaluation/final.distance_from_origin': 0.6207146197132432, 'evaluation/final.x_velocity': 0.7956363315961019, 'evaluation/final.y_velocity': 0.26460568154

  7%|▋         | 70150/1000000 [01:18<25:52, 598.75it/s]

{'evaluation/reward_linvel': 0.43736092613216027, 'evaluation/reward_quadctrl': -0.7640625143332316, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.1827706771057674, 'evaluation/y_position': -0.01723191675432666, 'evaluation/distance_from_origin': 0.19541763950043317, 'evaluation/x_velocity': 0.34988874090572814, 'evaluation/y_velocity': -0.03010003334879778, 'evaluation/forward_reward': 0.43736092613216027, 'evaluation/total.timesteps': 4469.0, 'evaluation/episode.return': 440.69204023263893, 'evaluation/episode.length': 94.3, 'evaluation/episode.duration': 0.0483757495880127, 'evaluation/final.reward_linvel': 0.9918457313642793, 'evaluation/final.reward_quadctrl': -0.9969773712397977, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.5057935340425672, 'evaluation/final.y_position': -0.041573907062013284, 'evaluation/final.distance_from_origin': 0.5534233935965085, 'evaluation/final.x_velocity': 0.7934765850914236, 'evaluation/final.y_velocity': 0.1416

  8%|▊         | 80087/1000000 [01:30<28:10, 544.17it/s]

{'evaluation/reward_linvel': 0.5103651809202703, 'evaluation/reward_quadctrl': -0.741721893732792, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.21537037219629193, 'evaluation/y_position': 0.031748548680808564, 'evaluation/distance_from_origin': 0.23699321382077612, 'evaluation/x_velocity': 0.4082921447362162, 'evaluation/y_velocity': -0.016028755225323817, 'evaluation/forward_reward': 0.5103651809202703, 'evaluation/total.timesteps': 5471.0, 'evaluation/episode.return': 505.9530527705915, 'evaluation/episode.length': 106.1, 'evaluation/episode.duration': 0.05685954093933106, 'evaluation/final.reward_linvel': 1.4848273639959035, 'evaluation/final.reward_quadctrl': -1.0393891781942575, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.6591502387744322, 'evaluation/final.y_position': -0.028524951534184974, 'evaluation/final.distance_from_origin': 0.7006563141347932, 'evaluation/final.x_velocity': 1.187861891196723, 'evaluation/final.y_velocity': -0.45784

  9%|▉         | 90096/1000000 [01:43<30:25, 498.55it/s]

{'evaluation/reward_linvel': 0.5014368180512013, 'evaluation/reward_quadctrl': -0.7822834577296331, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.3002660977396921, 'evaluation/y_position': 0.04667382469238276, 'evaluation/distance_from_origin': 0.3345900012075239, 'evaluation/x_velocity': 0.401149454440961, 'evaluation/y_velocity': 0.04094873783568556, 'evaluation/forward_reward': 0.5014368180512013, 'evaluation/total.timesteps': 6699.0, 'evaluation/episode.return': 658.3218937648587, 'evaluation/episode.length': 139.5, 'evaluation/episode.duration': 0.07405016422271729, 'evaluation/final.reward_linvel': 1.3439213563494106, 'evaluation/final.reward_quadctrl': -1.0107199461469594, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.8519138716992833, 'evaluation/final.y_position': 0.08983454966427998, 'evaluation/final.distance_from_origin': 0.9769911372768176, 'evaluation/final.x_velocity': 1.0751370850795283, 'evaluation/final.y_velocity': 0.055358330656

 10%|█         | 100117/1000000 [01:56<30:06, 498.27it/s]

{'evaluation/reward_linvel': 0.37166846921281754, 'evaluation/reward_quadctrl': -0.7601109954045154, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.2750850551367102, 'evaluation/y_position': -0.007930368567493816, 'evaluation/distance_from_origin': 0.29592223161690917, 'evaluation/x_velocity': 0.297334775370254, 'evaluation/y_velocity': -0.049186947323632164, 'evaluation/forward_reward': 0.37166846921281754, 'evaluation/total.timesteps': 8178.5, 'evaluation/episode.return': 721.2475889036183, 'evaluation/episode.length': 156.4, 'evaluation/episode.duration': 0.08249719142913818, 'evaluation/final.reward_linvel': 0.8796831044812852, 'evaluation/final.reward_quadctrl': -1.0946068557119049, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.7116949963916495, 'evaluation/final.y_position': -0.11802575839080083, 'evaluation/final.distance_from_origin': 0.8239029146846841, 'evaluation/final.x_velocity': 0.703746483585028, 'evaluation/final.y_velocity': -0.3947

 11%|█         | 110086/1000000 [02:09<34:24, 431.11it/s]

{'evaluation/reward_linvel': 0.45135474623592897, 'evaluation/reward_quadctrl': -0.7553767346341869, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.5643016692121176, 'evaluation/y_position': -0.2037239068009917, 'evaluation/distance_from_origin': 0.6074323653698929, 'evaluation/x_velocity': 0.3610837969887431, 'evaluation/y_velocity': -0.11239464818561914, 'evaluation/forward_reward': 0.45135474623592897, 'evaluation/total.timesteps': 9880.5, 'evaluation/episode.return': 864.0599541347204, 'evaluation/episode.length': 184.0, 'evaluation/episode.duration': 0.09990253448486328, 'evaluation/final.reward_linvel': 1.2807833920678864, 'evaluation/final.reward_quadctrl': -0.9007672055476524, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.0092016087796503, 'evaluation/final.y_position': -0.3103609385343036, 'evaluation/final.distance_from_origin': 1.084616369800835, 'evaluation/final.x_velocity': 1.024626713654309, 'evaluation/final.y_velocity': -0.272688646

 12%|█▏        | 120120/1000000 [02:22<34:06, 429.92it/s]

{'evaluation/reward_linvel': 0.27249141905517094, 'evaluation/reward_quadctrl': -0.7581117110578475, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.25665471665199263, 'evaluation/y_position': 0.10698894342406108, 'evaluation/distance_from_origin': 0.3132975961706327, 'evaluation/x_velocity': 0.21799313524413674, 'evaluation/y_velocity': 0.02035915038294337, 'evaluation/forward_reward': 0.27249141905517094, 'evaluation/total.timesteps': 11736.0, 'evaluation/episode.return': 844.6404433662992, 'evaluation/episode.length': 187.1, 'evaluation/episode.duration': 0.10148475170135499, 'evaluation/final.reward_linvel': 0.5181065437608098, 'evaluation/final.reward_quadctrl': -1.1409155556016448, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.6282837565216277, 'evaluation/final.y_position': 0.057283668685677025, 'evaluation/final.distance_from_origin': 0.7772349498299639, 'evaluation/final.x_velocity': 0.4144852350086479, 'evaluation/final.y_velocity': -0.5208

 13%|█▎        | 130149/1000000 [02:36<32:46, 442.39it/s]

{'evaluation/reward_linvel': 0.38217573194402205, 'evaluation/reward_quadctrl': -0.7871004063915741, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.2888002131542668, 'evaluation/y_position': -0.030479402709587357, 'evaluation/distance_from_origin': 0.3086281294356882, 'evaluation/x_velocity': 0.30574058555521766, 'evaluation/y_velocity': 0.024261268305471127, 'evaluation/forward_reward': 0.38217573194402205, 'evaluation/total.timesteps': 13622.0, 'evaluation/episode.return': 873.52381938752, 'evaluation/episode.length': 190.1, 'evaluation/episode.duration': 0.10314750671386719, 'evaluation/final.reward_linvel': 1.3965774603751235, 'evaluation/final.reward_quadctrl': -0.9483211028485966, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.881728432739432, 'evaluation/final.y_position': 0.06531597129899375, 'evaluation/final.distance_from_origin': 0.9772866322199366, 'evaluation/final.x_velocity': 1.1172619683000988, 'evaluation/final.y_velocity': 0.3378135

 14%|█▍        | 140093/1000000 [02:48<27:32, 520.41it/s]

{'evaluation/reward_linvel': 0.29947690121963133, 'evaluation/reward_quadctrl': -0.754113694617327, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.24138586323015246, 'evaluation/y_position': -0.017089468762889665, 'evaluation/distance_from_origin': 0.26436240265473154, 'evaluation/x_velocity': 0.23958152097570504, 'evaluation/y_velocity': 0.025796402024469606, 'evaluation/forward_reward': 0.29947690121963133, 'evaluation/total.timesteps': 15308.0, 'evaluation/episode.return': 668.6229276911988, 'evaluation/episode.length': 147.1, 'evaluation/episode.duration': 0.07656192779541016, 'evaluation/final.reward_linvel': 0.625900607423689, 'evaluation/final.reward_quadctrl': -0.9099670757006143, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.5472734944107702, 'evaluation/final.y_position': 0.058962195549883245, 'evaluation/final.distance_from_origin': 0.683353699556595, 'evaluation/final.x_velocity': 0.5007204859389512, 'evaluation/final.y_velocity': 0.4975

 15%|█▌        | 150081/1000000 [03:01<38:42, 365.88it/s]

{'evaluation/reward_linvel': 0.19159995288079554, 'evaluation/reward_quadctrl': -0.7895084622833209, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.33039439503354034, 'evaluation/y_position': -0.11635190418449097, 'evaluation/distance_from_origin': 0.37833180516701415, 'evaluation/x_velocity': 0.15327996230463645, 'evaluation/y_velocity': -0.05443434164693939, 'evaluation/forward_reward': 0.19159995288079554, 'evaluation/total.timesteps': 17242.5, 'evaluation/episode.return': 1055.6215394452745, 'evaluation/episode.length': 239.8, 'evaluation/episode.duration': 0.1343388319015503, 'evaluation/final.reward_linvel': 0.40851489587818846, 'evaluation/final.reward_quadctrl': -0.9292777962677498, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 0.5651835974650228, 'evaluation/final.y_position': -0.19901761109620844, 'evaluation/final.distance_from_origin': 0.7899427391385486, 'evaluation/final.x_velocity': 0.3268119167025507, 'evaluation/final.y_velocity': -0.

 16%|█▌        | 160104/1000000 [03:16<50:28, 277.32it/s]

{'evaluation/reward_linvel': 0.25000573517815866, 'evaluation/reward_quadctrl': -0.7852113927897684, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.6671561544852154, 'evaluation/y_position': -0.10329996405169169, 'evaluation/distance_from_origin': 0.7157731693844257, 'evaluation/x_velocity': 0.20000458814252692, 'evaluation/y_velocity': -0.024689775117866696, 'evaluation/forward_reward': 0.25000573517815866, 'evaluation/total.timesteps': 20411.0, 'evaluation/episode.return': 1758.6824914667873, 'evaluation/episode.length': 393.9, 'evaluation/episode.duration': 0.2181682825088501, 'evaluation/final.reward_linvel': 0.9955229284081402, 'evaluation/final.reward_quadctrl': -1.0394297933348107, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.1997017796205915, 'evaluation/final.y_position': -0.14799170904803347, 'evaluation/final.distance_from_origin': 1.3024771688755998, 'evaluation/final.x_velocity': 0.7964183427265121, 'evaluation/final.y_velocity': -0.54

 17%|█▋        | 170103/1000000 [03:30<38:28, 359.56it/s]

{'evaluation/reward_linvel': 0.4450051485833878, 'evaluation/reward_quadctrl': -0.7606366999170315, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.5234917239742077, 'evaluation/y_position': 0.07199459021613415, 'evaluation/distance_from_origin': 0.5396024781703753, 'evaluation/x_velocity': 0.3560041188667103, 'evaluation/y_velocity': 0.07983145139240128, 'evaluation/forward_reward': 0.4450051485833878, 'evaluation/total.timesteps': 23618.5, 'evaluation/episode.return': 1159.8496278897896, 'evaluation/episode.length': 247.6, 'evaluation/episode.duration': 0.13679566383361816, 'evaluation/final.reward_linvel': 1.5843658975312729, 'evaluation/final.reward_quadctrl': -0.9948325141119814, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.3319933000636208, 'evaluation/final.y_position': 0.2969944061679695, 'evaluation/final.distance_from_origin': 1.3917138518588357, 'evaluation/final.x_velocity': 1.2674927180250184, 'evaluation/final.y_velocity': 0.3821452074

 18%|█▊        | 180155/1000000 [03:46<1:08:09, 200.47it/s]

{'evaluation/reward_linvel': 0.27600791389190776, 'evaluation/reward_quadctrl': -0.7632334420354167, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.0911874950893614, 'evaluation/y_position': 0.114890445403515, 'evaluation/distance_from_origin': 1.1087463326675886, 'evaluation/x_velocity': 0.2208063311135262, 'evaluation/y_velocity': 0.01663406778875467, 'evaluation/forward_reward': 0.27600791389190776, 'evaluation/total.timesteps': 27933.5, 'evaluation/episode.return': 2777.161409980485, 'evaluation/episode.length': 615.4, 'evaluation/episode.duration': 0.3522321701049805, 'evaluation/final.reward_linvel': 1.0601602726899657, 'evaluation/final.reward_quadctrl': -0.9254210089512679, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 2.0494097794620365, 'evaluation/final.y_position': 0.1530986083365893, 'evaluation/final.distance_from_origin': 2.0943589277160712, 'evaluation/final.x_velocity': 0.8481282181519727, 'evaluation/final.y_velocity': -0.06859299441

 19%|█▉        | 190111/1000000 [04:01<57:01, 236.73it/s]  

{'evaluation/reward_linvel': 0.3333319734318217, 'evaluation/reward_quadctrl': -0.7769652266214814, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.0495227284463755, 'evaluation/y_position': 0.0335802254666462, 'evaluation/distance_from_origin': 1.0639284508495075, 'evaluation/x_velocity': 0.2666655787454574, 'evaluation/y_velocity': 0.03435894523977964, 'evaluation/forward_reward': 0.3333319734318217, 'evaluation/total.timesteps': 33449.0, 'evaluation/episode.return': 2222.140062419404, 'evaluation/episode.length': 487.7, 'evaluation/episode.duration': 0.26656925678253174, 'evaluation/final.reward_linvel': 0.962747044979604, 'evaluation/final.reward_quadctrl': -0.853049524200243, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.967306781737286, 'evaluation/final.y_position': 0.25279094610209224, 'evaluation/final.distance_from_origin': 2.0237058863271518, 'evaluation/final.x_velocity': 0.7701976359836831, 'evaluation/final.y_velocity': 0.50026165182758

 20%|██        | 200085/1000000 [04:15<48:27, 275.14it/s]

{'evaluation/reward_linvel': 0.35886482100567524, 'evaluation/reward_quadctrl': -0.7539383380993182, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.9494939769374696, 'evaluation/y_position': -0.18441862925090072, 'evaluation/distance_from_origin': 0.9843887354093045, 'evaluation/x_velocity': 0.2870918568045402, 'evaluation/y_velocity': -0.022266326603618384, 'evaluation/forward_reward': 0.35886482100567524, 'evaluation/total.timesteps': 37782.0, 'evaluation/episode.return': 1744.8066443732182, 'evaluation/episode.length': 378.9, 'evaluation/episode.duration': 0.21420953273773194, 'evaluation/final.reward_linvel': 1.3208507442993134, 'evaluation/final.reward_quadctrl': -1.0025969838690607, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.648153052131382, 'evaluation/final.y_position': -0.12360627422669765, 'evaluation/final.distance_from_origin': 1.7305530111233138, 'evaluation/final.x_velocity': 1.0566805954394507, 'evaluation/final.y_velocity': 0.1098

 21%|██        | 210129/1000000 [04:29<46:01, 286.06it/s]

{'evaluation/reward_linvel': 0.36456787608941904, 'evaluation/reward_quadctrl': -0.7098994406307054, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.9832328591296189, 'evaluation/y_position': -0.12422473497226695, 'evaluation/distance_from_origin': 0.9998685325671764, 'evaluation/x_velocity': 0.29165430087153515, 'evaluation/y_velocity': -0.09749093315639544, 'evaluation/forward_reward': 0.36456787608941904, 'evaluation/total.timesteps': 41517.0, 'evaluation/episode.return': 1713.3834510923516, 'evaluation/episode.length': 368.1, 'evaluation/episode.duration': 0.20717663764953614, 'evaluation/final.reward_linvel': 0.6700452823210316, 'evaluation/final.reward_quadctrl': -1.0884495613309881, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.62662099357774, 'evaluation/final.y_position': -0.5384718114244021, 'evaluation/final.distance_from_origin': 1.7707554180785336, 'evaluation/final.x_velocity': 0.5360362258568252, 'evaluation/final.y_velocity': -0.79775

 22%|██▏       | 220147/1000000 [04:46<1:14:33, 174.34it/s]

{'evaluation/reward_linvel': 0.18190896533022222, 'evaluation/reward_quadctrl': -0.6927298211130576, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.0359567302792931, 'evaluation/y_position': -0.008811643778229959, 'evaluation/distance_from_origin': 1.0512799746307402, 'evaluation/x_velocity': 0.14552717226417777, 'evaluation/y_velocity': 0.002259035546022722, 'evaluation/forward_reward': 0.18190896533022222, 'evaluation/total.timesteps': 47358.0, 'evaluation/episode.return': 3591.792233288152, 'evaluation/episode.length': 800.1, 'evaluation/episode.duration': 0.43993072509765624, 'evaluation/final.reward_linvel': -0.16148864279138256, 'evaluation/final.reward_quadctrl': -0.7386215946709911, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.7632516422615248, 'evaluation/final.y_position': 0.027842543440138277, 'evaluation/final.distance_from_origin': 1.7886222138201877, 'evaluation/final.x_velocity': -0.12919091423310605, 'evaluation/final.y_velocity': 0

 23%|██▎       | 230145/1000000 [05:02<1:13:18, 175.02it/s]

{'evaluation/reward_linvel': 0.3703166286378348, 'evaluation/reward_quadctrl': -0.7044842546939608, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.7848414345293149, 'evaluation/y_position': -0.21809408976211725, 'evaluation/distance_from_origin': 1.822235810406377, 'evaluation/x_velocity': 0.29625330291026786, 'evaluation/y_velocity': -0.020041001884933907, 'evaluation/forward_reward': 0.3703166286378348, 'evaluation/total.timesteps': 55113.5, 'evaluation/episode.return': 3504.0401128318504, 'evaluation/episode.length': 751.0, 'evaluation/episode.duration': 0.40740361213684084, 'evaluation/final.reward_linvel': 0.8099499690428891, 'evaluation/final.reward_quadctrl': -0.9901641930820917, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 3.3485242394087984, 'evaluation/final.y_position': -0.2249642934525525, 'evaluation/final.distance_from_origin': 3.552903833157218, 'evaluation/final.x_velocity': 0.6479599752343114, 'evaluation/final.y_velocity': 0.5126944

 24%|██▍       | 240128/1000000 [05:17<51:34, 245.58it/s]  

{'evaluation/reward_linvel': 0.2808427042722918, 'evaluation/reward_quadctrl': -0.7494389100077602, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.9906020707718146, 'evaluation/y_position': -0.20477092192637925, 'evaluation/distance_from_origin': 1.0316682947916602, 'evaluation/x_velocity': 0.22467416341783344, 'evaluation/y_velocity': -0.012063554283537335, 'evaluation/forward_reward': 0.2808427042722918, 'evaluation/total.timesteps': 61109.0, 'evaluation/episode.return': 2030.5220402099362, 'evaluation/episode.length': 448.1, 'evaluation/episode.duration': 0.25200748443603516, 'evaluation/final.reward_linvel': 0.8483465104604193, 'evaluation/final.reward_quadctrl': -0.8676524878750962, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.5281971612330245, 'evaluation/final.y_position': -0.08378812977353126, 'evaluation/final.distance_from_origin': 1.687081599071379, 'evaluation/final.x_velocity': 0.6786772083683352, 'evaluation/final.y_velocity': 0.29593

 25%|██▌       | 250114/1000000 [05:32<50:45, 246.23it/s]

{'evaluation/reward_linvel': 0.43440196863668507, 'evaluation/reward_quadctrl': -0.7733909393848757, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.1663123896365515, 'evaluation/y_position': -0.2844810811265485, 'evaluation/distance_from_origin': 1.2357297206055882, 'evaluation/x_velocity': 0.347521574909348, 'evaluation/y_velocity': -0.08411904596003787, 'evaluation/forward_reward': 0.43440196863668507, 'evaluation/total.timesteps': 65728.5, 'evaluation/episode.return': 2217.7090477180122, 'evaluation/episode.length': 475.8, 'evaluation/episode.duration': 0.27034261226654055, 'evaluation/final.reward_linvel': 1.0772773439624306, 'evaluation/final.reward_quadctrl': -0.9017560249884085, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 2.490534741207047, 'evaluation/final.y_position': -0.5993206059922219, 'evaluation/final.distance_from_origin': 2.611082297353697, 'evaluation/final.x_velocity': 0.8618218751699442, 'evaluation/final.y_velocity': -0.23185466

 26%|██▌       | 260103/1000000 [05:49<1:14:26, 165.66it/s]

{'evaluation/reward_linvel': 0.33937085384433385, 'evaluation/reward_quadctrl': -0.720594486167524, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.621715910044839, 'evaluation/y_position': -0.3952790943970295, 'evaluation/distance_from_origin': 1.6803652465687096, 'evaluation/x_velocity': 0.27149668307546704, 'evaluation/y_velocity': -0.05533112519263873, 'evaluation/forward_reward': 0.33937085384433385, 'evaluation/total.timesteps': 71894.0, 'evaluation/episode.return': 3497.799343241647, 'evaluation/episode.length': 757.3, 'evaluation/episode.duration': 0.43242874145507815, 'evaluation/final.reward_linvel': 0.47053867846729525, 'evaluation/final.reward_quadctrl': -0.7777957708854665, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 3.100791604975734, 'evaluation/final.y_position': -0.6278607003986995, 'evaluation/final.distance_from_origin': 3.1967306070859367, 'evaluation/final.x_velocity': 0.37643094277383626, 'evaluation/final.y_velocity': 0.2239084

 27%|██▋       | 270149/1000000 [06:06<1:16:17, 159.43it/s]

{'evaluation/reward_linvel': 0.1923815977409467, 'evaluation/reward_quadctrl': -0.696501804609935, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.0416253991874829, 'evaluation/y_position': -0.17900511186831497, 'evaluation/distance_from_origin': 1.0860482163234224, 'evaluation/x_velocity': 0.15390527819275737, 'evaluation/y_velocity': -0.03546022346312267, 'evaluation/forward_reward': 0.1923815977409467, 'evaluation/total.timesteps': 79621.5, 'evaluation/episode.return': 3543.652452945863, 'evaluation/episode.length': 788.2, 'evaluation/episode.duration': 0.4612100839614868, 'evaluation/final.reward_linvel': -0.125106357751571, 'evaluation/final.reward_quadctrl': -0.8148538593667265, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.8285224693864066, 'evaluation/final.y_position': -0.418618004961392, 'evaluation/final.distance_from_origin': 1.9252231930544685, 'evaluation/final.x_velocity': -0.10008508620125682, 'evaluation/final.y_velocity': -0.1396119

 28%|██▊       | 280151/1000000 [06:21<47:37, 251.91it/s]  

{'evaluation/reward_linvel': 0.45382493535824153, 'evaluation/reward_quadctrl': -0.749870610781252, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.4624360590840282, 'evaluation/y_position': -0.16241354963969123, 'evaluation/distance_from_origin': 1.489173076900704, 'evaluation/x_velocity': 0.3630599482865932, 'evaluation/y_velocity': -0.10678687192102088, 'evaluation/forward_reward': 0.45382493535824153, 'evaluation/total.timesteps': 85760.5, 'evaluation/episode.return': 2067.858321084045, 'evaluation/episode.length': 439.6, 'evaluation/episode.duration': 0.24709732532501222, 'evaluation/final.reward_linvel': 1.208990294836095, 'evaluation/final.reward_quadctrl': -0.9227622600491994, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 2.403991529094559, 'evaluation/final.y_position': -0.7033505172681525, 'evaluation/final.distance_from_origin': 2.5422982149328375, 'evaluation/final.x_velocity': 0.967192235868876, 'evaluation/final.y_velocity': -0.7264073063

 29%|██▉       | 290119/1000000 [06:36<48:49, 242.32it/s]

{'evaluation/reward_linvel': 0.4828123578863718, 'evaluation/reward_quadctrl': -0.7558985929928039, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.6396002931803322, 'evaluation/y_position': -0.2867114838298246, 'evaluation/distance_from_origin': 1.6769359540014932, 'evaluation/x_velocity': 0.38624988630909746, 'evaluation/y_velocity': 0.008146468165305362, 'evaluation/forward_reward': 0.4828123578863718, 'evaluation/total.timesteps': 90347.0, 'evaluation/episode.return': 2258.0467054896567, 'evaluation/episode.length': 477.7, 'evaluation/episode.duration': 0.26906619071960447, 'evaluation/final.reward_linvel': 1.0945200890078444, 'evaluation/final.reward_quadctrl': -0.8846795009142026, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 2.782026532624787, 'evaluation/final.y_position': 0.05816812267979057, 'evaluation/final.distance_from_origin': 2.8389015194177283, 'evaluation/final.x_velocity': 0.8756160712062755, 'evaluation/final.y_velocity': 1.00169940

 30%|███       | 300092/1000000 [06:53<1:06:31, 175.33it/s]

{'evaluation/reward_linvel': 0.4175717757064139, 'evaluation/reward_quadctrl': -0.7424706121156264, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.0941942436046173, 'evaluation/y_position': 0.06270220781026968, 'evaluation/distance_from_origin': 2.107925153539989, 'evaluation/x_velocity': 0.3340574205651311, 'evaluation/y_velocity': 0.03228084288134497, 'evaluation/forward_reward': 0.4175717757064139, 'evaluation/total.timesteps': 96167.0, 'evaluation/episode.return': 3208.521928572358, 'evaluation/episode.length': 686.3, 'evaluation/episode.duration': 0.39503648281097414, 'evaluation/final.reward_linvel': 0.2935296457597363, 'evaluation/final.reward_quadctrl': -0.8178046901517954, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 3.4587167262596177, 'evaluation/final.y_position': 0.33027431802668245, 'evaluation/final.distance_from_origin': 3.5204123862774077, 'evaluation/final.x_velocity': 0.23482371660778897, 'evaluation/final.y_velocity': 0.5652653704

 31%|███       | 310126/1000000 [07:10<1:24:07, 136.66it/s]

{'evaluation/reward_linvel': 0.4368496608563411, 'evaluation/reward_quadctrl': -0.665035563949201, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.537510480437256, 'evaluation/y_position': -0.35590722364604205, 'evaluation/distance_from_origin': 2.5741785111473146, 'evaluation/x_velocity': 0.34947972868507293, 'evaluation/y_velocity': -0.04592066847566216, 'evaluation/forward_reward': 0.4368496608563411, 'evaluation/total.timesteps': 104385.0, 'evaluation/episode.return': 4568.057634969204, 'evaluation/episode.length': 957.3, 'evaluation/episode.duration': 0.541758131980896, 'evaluation/final.reward_linvel': -0.05993398453458443, 'evaluation/final.reward_quadctrl': -0.7020129172151426, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.032467481287694, 'evaluation/final.y_position': -0.664629639786163, 'evaluation/final.distance_from_origin': 5.085115291819933, 'evaluation/final.x_velocity': -0.04794718762766753, 'evaluation/final.y_velocity': -0.01526321

 32%|███▏      | 320115/1000000 [07:28<1:13:23, 154.40it/s]

{'evaluation/reward_linvel': 0.46892800452213845, 'evaluation/reward_quadctrl': -0.7408840019448274, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.2934740335790185, 'evaluation/y_position': -0.26095952099479486, 'evaluation/distance_from_origin': 2.3183030991482094, 'evaluation/x_velocity': 0.3751424036177107, 'evaluation/y_velocity': -0.036237862234133916, 'evaluation/forward_reward': 0.46892800452213845, 'evaluation/total.timesteps': 113280.5, 'evaluation/episode.return': 3885.506561318033, 'evaluation/episode.length': 821.8, 'evaluation/episode.duration': 0.4668283224105835, 'evaluation/final.reward_linvel': 0.7477100592921356, 'evaluation/final.reward_quadctrl': -0.862387013237613, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.640441591920716, 'evaluation/final.y_position': -0.44708596235321585, 'evaluation/final.distance_from_origin': 4.709266364209183, 'evaluation/final.x_velocity': 0.5981680474337085, 'evaluation/final.y_velocity': -0.033465

 33%|███▎      | 330115/1000000 [07:43<42:51, 260.55it/s]  

{'evaluation/reward_linvel': 0.37905574776859696, 'evaluation/reward_quadctrl': -0.7526267683454438, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.1656337399071017, 'evaluation/y_position': -0.2163023964925196, 'evaluation/distance_from_origin': 1.2388673164063917, 'evaluation/x_velocity': 0.3032445982148776, 'evaluation/y_velocity': -0.11991039461094687, 'evaluation/forward_reward': 0.37905574776859696, 'evaluation/total.timesteps': 119400.5, 'evaluation/episode.return': 1860.7497355239925, 'evaluation/episode.length': 402.2, 'evaluation/episode.duration': 0.23097379207611085, 'evaluation/final.reward_linvel': 0.930491934359489, 'evaluation/final.reward_quadctrl': -0.8992967334469126, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.8410339213711677, 'evaluation/final.y_position': -0.7247626103240762, 'evaluation/final.distance_from_origin': 2.2053982459203376, 'evaluation/final.x_velocity': 0.7443935474875911, 'evaluation/final.y_velocity': -0.78280

 34%|███▍      | 340085/1000000 [07:58<53:00, 207.46it/s]

{'evaluation/reward_linvel': 0.4914567537573085, 'evaluation/reward_quadctrl': -0.7443794474789825, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.9540481954358884, 'evaluation/y_position': -0.3459086407917997, 'evaluation/distance_from_origin': 2.000362217606712, 'evaluation/x_velocity': 0.39316540300584674, 'evaluation/y_velocity': -0.09262841398200655, 'evaluation/forward_reward': 0.4914567537573085, 'evaluation/total.timesteps': 124192.5, 'evaluation/episode.return': 2640.3243977520056, 'evaluation/episode.length': 556.2, 'evaluation/episode.duration': 0.3187283515930176, 'evaluation/final.reward_linvel': 1.199478899047285, 'evaluation/final.reward_quadctrl': -0.819264021329784, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 3.2945023743772097, 'evaluation/final.y_position': -0.7687635340326738, 'evaluation/final.distance_from_origin': 3.418828201073719, 'evaluation/final.x_velocity': 0.9595831192378279, 'evaluation/final.y_velocity': -0.4252771067

 35%|███▌      | 350089/1000000 [08:14<57:58, 186.83it/s]  

{'evaluation/reward_linvel': 0.5835554926807346, 'evaluation/reward_quadctrl': -0.7043505608801611, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.6840447209844305, 'evaluation/y_position': -0.1500971097975266, 'evaluation/distance_from_origin': 2.6974738340839126, 'evaluation/x_velocity': 0.4668443941445876, 'evaluation/y_velocity': -0.05048894775035418, 'evaluation/forward_reward': 0.5835554926807346, 'evaluation/total.timesteps': 130301.5, 'evaluation/episode.return': 3247.5988026064615, 'evaluation/episode.length': 665.6, 'evaluation/episode.duration': 0.38063621520996094, 'evaluation/final.reward_linvel': 0.890845389722217, 'evaluation/final.reward_quadctrl': -0.822018257947882, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.678753053283738, 'evaluation/final.y_position': -0.5032773424645341, 'evaluation/final.distance_from_origin': 4.724556124166247, 'evaluation/final.x_velocity': 0.7126763117777736, 'evaluation/final.y_velocity': -0.3912878912

 36%|███▌      | 360081/1000000 [08:31<1:05:23, 163.12it/s]

{'evaluation/reward_linvel': 0.558746027313327, 'evaluation/reward_quadctrl': -0.7051035949124979, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.761142560088838, 'evaluation/y_position': -0.1912873212110329, 'evaluation/distance_from_origin': 2.775820503292957, 'evaluation/x_velocity': 0.4469968218506616, 'evaluation/y_velocity': -0.05041444750599612, 'evaluation/forward_reward': 0.558746027313327, 'evaluation/total.timesteps': 137576.5, 'evaluation/episode.return': 3831.4653361372134, 'evaluation/episode.length': 789.4, 'evaluation/episode.duration': 0.4349662780761719, 'evaluation/final.reward_linvel': 1.092341614292917, 'evaluation/final.reward_quadctrl': -0.7723638273712089, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.314165562180603, 'evaluation/final.y_position': -0.596775950805402, 'evaluation/final.distance_from_origin': 5.3654170619751165, 'evaluation/final.x_velocity': 0.8738732914343336, 'evaluation/final.y_velocity': -0.27075168119899

 37%|███▋      | 370077/1000000 [08:48<1:01:28, 170.77it/s]

{'evaluation/reward_linvel': 0.4533003871911258, 'evaluation/reward_quadctrl': -0.7043518107934628, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.085970376328659, 'evaluation/y_position': -0.10250212020421996, 'evaluation/distance_from_origin': 2.1029453638046505, 'evaluation/x_velocity': 0.3626403097529006, 'evaluation/y_velocity': -0.03881727761182168, 'evaluation/forward_reward': 0.4533003871911258, 'evaluation/total.timesteps': 145064.5, 'evaluation/episode.return': 3363.205381804825, 'evaluation/episode.length': 708.2, 'evaluation/episode.duration': 0.40180580615997313, 'evaluation/final.reward_linvel': 0.5755214670515134, 'evaluation/final.reward_quadctrl': -0.8785471559992486, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 3.868503102123647, 'evaluation/final.y_position': -0.4116209528464889, 'evaluation/final.distance_from_origin': 3.9389050851845155, 'evaluation/final.x_velocity': 0.4604171736412107, 'evaluation/final.y_velocity': -0.25007954

 38%|███▊      | 380152/1000000 [09:05<1:04:50, 159.33it/s]

{'evaluation/reward_linvel': 0.5488373596759116, 'evaluation/reward_quadctrl': -0.7046478985611048, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.8896169776552627, 'evaluation/y_position': -1.1052068951920604, 'evaluation/distance_from_origin': 3.1045392879978637, 'evaluation/x_velocity': 0.43906988774072925, 'evaluation/y_velocity': -0.17040454026763238, 'evaluation/forward_reward': 0.5488373596759116, 'evaluation/total.timesteps': 152530.5, 'evaluation/episode.return': 3802.688726975123, 'evaluation/episode.length': 785.0, 'evaluation/episode.duration': 0.44671289920806884, 'evaluation/final.reward_linvel': 0.9264433006089312, 'evaluation/final.reward_quadctrl': -0.7929082277325186, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.186605079237674, 'evaluation/final.y_position': -2.0071311815118027, 'evaluation/final.distance_from_origin': 5.578230484526559, 'evaluation/final.x_velocity': 0.7411546404871451, 'evaluation/final.y_velocity': -0.26938994

 39%|███▉      | 390126/1000000 [09:22<1:08:26, 148.51it/s]

{'evaluation/reward_linvel': 0.4865400574759862, 'evaluation/reward_quadctrl': -0.6951473549261757, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.805799089759889, 'evaluation/y_position': -0.29326388591951613, 'evaluation/distance_from_origin': 2.826460284282578, 'evaluation/x_velocity': 0.389232045980789, 'evaluation/y_velocity': -0.03664766926538846, 'evaluation/forward_reward': 0.4865400574759862, 'evaluation/total.timesteps': 161227.0, 'evaluation/episode.return': 4572.426056043286, 'evaluation/episode.length': 954.3, 'evaluation/episode.duration': 0.5349033832550049, 'evaluation/final.reward_linvel': 0.463822446752434, 'evaluation/final.reward_quadctrl': -0.8108525083901051, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.5873499225359815, 'evaluation/final.y_position': -0.5239388534805987, 'evaluation/final.distance_from_origin': 5.6256092459817815, 'evaluation/final.x_velocity': 0.3710579574019472, 'evaluation/final.y_velocity': 0.012427014618

 40%|████      | 400157/1000000 [09:38<49:33, 201.71it/s]  

{'evaluation/reward_linvel': 0.6663399964121508, 'evaluation/reward_quadctrl': -0.7541480812112302, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.8214526079740345, 'evaluation/y_position': -1.113372872748964, 'evaluation/distance_from_origin': 3.0410788878718735, 'evaluation/x_velocity': 0.5330719971297205, 'evaluation/y_velocity': -0.1956989725246731, 'evaluation/forward_reward': 0.6663399964121508, 'evaluation/total.timesteps': 169012.0, 'evaluation/episode.return': 2960.5780672915944, 'evaluation/episode.length': 602.7, 'evaluation/episode.duration': 0.35288639068603517, 'evaluation/final.reward_linvel': 1.2030435087128997, 'evaluation/final.reward_quadctrl': -0.8633154327053317, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.833206909663194, 'evaluation/final.y_position': -1.772405269318679, 'evaluation/final.distance_from_origin': 5.162570737859283, 'evaluation/final.x_velocity': 0.9624348069703196, 'evaluation/final.y_velocity': -0.13713687853

 41%|████      | 410136/1000000 [09:56<1:13:25, 133.88it/s]

{'evaluation/reward_linvel': 0.4778022835870115, 'evaluation/reward_quadctrl': -0.6704489084964531, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.7647209661257506, 'evaluation/y_position': -0.9633631246409883, 'evaluation/distance_from_origin': 2.935806348073166, 'evaluation/x_velocity': 0.3822418268696092, 'evaluation/y_velocity': -0.12562980268259405, 'evaluation/forward_reward': 0.4778022835870115, 'evaluation/total.timesteps': 177025.5, 'evaluation/episode.return': 4807.353375090558, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.556072187423706, 'evaluation/final.reward_linvel': 0.504184647799429, 'evaluation/final.reward_quadctrl': -0.6974460557344895, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.7533660560871525, 'evaluation/final.y_position': -1.885092795937164, 'evaluation/final.distance_from_origin': 6.068993209610238, 'evaluation/final.x_velocity': 0.40334771823954324, 'evaluation/final.y_velocity': -0.03317447653

 42%|████▏     | 420139/1000000 [10:15<1:14:57, 128.92it/s]

{'evaluation/reward_linvel': 0.4347413451046372, 'evaluation/reward_quadctrl': -0.6975499849872606, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.5210440984403597, 'evaluation/y_position': -0.9293189558355007, 'evaluation/distance_from_origin': 2.6947007206511837, 'evaluation/x_velocity': 0.34779307608370974, 'evaluation/y_velocity': -0.13063440613547614, 'evaluation/forward_reward': 0.4347413451046372, 'evaluation/total.timesteps': 187025.5, 'evaluation/episode.return': 4737.191360117377, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5757420778274536, 'evaluation/final.reward_linvel': 0.4617686928065338, 'evaluation/final.reward_quadctrl': -0.6889903288030231, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.231645968577526, 'evaluation/final.y_position': -1.962313665725781, 'evaluation/final.distance_from_origin': 5.596583742338312, 'evaluation/final.x_velocity': 0.3694149542452271, 'evaluation/final.y_velocity': -0.135252053

 43%|████▎     | 430138/1000000 [10:32<1:01:30, 154.40it/s]

{'evaluation/reward_linvel': 0.5745187253739804, 'evaluation/reward_quadctrl': -0.6594361146457143, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.9804043403241516, 'evaluation/y_position': -0.49085741327178467, 'evaluation/distance_from_origin': 3.0310080644230477, 'evaluation/x_velocity': 0.45961498029918435, 'evaluation/y_velocity': -0.08531781158049195, 'evaluation/forward_reward': 0.5745187253739804, 'evaluation/total.timesteps': 196523.5, 'evaluation/episode.return': 4421.608316611145, 'evaluation/episode.length': 899.6, 'evaluation/episode.duration': 0.5011675119400024, 'evaluation/final.reward_linvel': 0.9187304459322144, 'evaluation/final.reward_quadctrl': -0.7730899116552763, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.2145410911654535, 'evaluation/final.y_position': -1.1552000927703248, 'evaluation/final.distance_from_origin': 6.335929530502582, 'evaluation/final.x_velocity': 0.7349843567457714, 'evaluation/final.y_velocity': -0.4085245

 44%|████▍     | 440142/1000000 [10:50<1:12:10, 129.29it/s]

{'evaluation/reward_linvel': 0.4848754898073344, 'evaluation/reward_quadctrl': -0.6986212993769226, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.8320181705126912, 'evaluation/y_position': -0.8591726395165518, 'evaluation/distance_from_origin': 2.966302498513527, 'evaluation/x_velocity': 0.38790039184586755, 'evaluation/y_velocity': -0.11232583875795032, 'evaluation/forward_reward': 0.4848754898073344, 'evaluation/total.timesteps': 205930.0, 'evaluation/episode.return': 4698.665738745536, 'evaluation/episode.length': 981.7, 'evaluation/episode.duration': 0.5690365314483643, 'evaluation/final.reward_linvel': 0.366313307922872, 'evaluation/final.reward_quadctrl': -0.6378147825363085, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.728552125305283, 'evaluation/final.y_position': -1.6535998765518094, 'evaluation/final.distance_from_origin': 5.971481449228812, 'evaluation/final.x_velocity': 0.2930506463382976, 'evaluation/final.y_velocity': 0.123337370720

 45%|████▌     | 450140/1000000 [11:09<1:06:57, 136.86it/s]

{'evaluation/reward_linvel': 0.6077827860705057, 'evaluation/reward_quadctrl': -0.6601761881698525, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.4520076943830857, 'evaluation/y_position': -0.7189888562544287, 'evaluation/distance_from_origin': 3.532456055607814, 'evaluation/x_velocity': 0.48622622885640443, 'evaluation/y_velocity': -0.11156942687323525, 'evaluation/forward_reward': 0.6077827860705057, 'evaluation/total.timesteps': 215802.0, 'evaluation/episode.return': 4911.489069735979, 'evaluation/episode.length': 992.7, 'evaluation/episode.duration': 0.5588824272155761, 'evaluation/final.reward_linvel': 0.743239797763103, 'evaluation/final.reward_quadctrl': -0.6165163684738458, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.248137861211731, 'evaluation/final.y_position': -1.6582434013432163, 'evaluation/final.distance_from_origin': 7.451767096505352, 'evaluation/final.x_velocity': 0.5945918382104824, 'evaluation/final.y_velocity': -0.15465379750

 46%|████▌     | 460089/1000000 [11:25<53:53, 166.98it/s]  

{'evaluation/reward_linvel': 0.43950126689866825, 'evaluation/reward_quadctrl': -0.6959604879611938, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.3372158589963767, 'evaluation/y_position': -0.0069041584186680695, 'evaluation/distance_from_origin': 2.3538245120421295, 'evaluation/x_velocity': 0.3516010135189346, 'evaluation/y_velocity': -0.018031234345472748, 'evaluation/forward_reward': 0.43950126689866825, 'evaluation/total.timesteps': 224380.0, 'evaluation/episode.return': 3429.1056290939, 'evaluation/episode.length': 722.9, 'evaluation/episode.duration': 0.41440207958221437, 'evaluation/final.reward_linvel': 0.7295474773896296, 'evaluation/final.reward_quadctrl': -0.8237882953129965, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 3.826518067007175, 'evaluation/final.y_position': -0.19818716482173712, 'evaluation/final.distance_from_origin': 3.850051339355118, 'evaluation/final.x_velocity': 0.5836379819117037, 'evaluation/final.y_velocity': -0.2027

 47%|████▋     | 470114/1000000 [11:43<1:05:05, 135.67it/s]

{'evaluation/reward_linvel': 0.48848627450193893, 'evaluation/reward_quadctrl': -0.6615639158009172, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.625434748330304, 'evaluation/y_position': -0.8542161352455769, 'evaluation/distance_from_origin': 2.7820226047767793, 'evaluation/x_velocity': 0.3907890196015511, 'evaluation/y_velocity': -0.13682328427256416, 'evaluation/forward_reward': 0.48848627450193893, 'evaluation/total.timesteps': 232716.5, 'evaluation/episode.return': 4558.545475557245, 'evaluation/episode.length': 944.4, 'evaluation/episode.duration': 0.5396087169647217, 'evaluation/final.reward_linvel': 0.4789537860401779, 'evaluation/final.reward_quadctrl': -0.7705450732501332, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.5519401964271395, 'evaluation/final.y_position': -1.9371411298695727, 'evaluation/final.distance_from_origin': 5.909601554604906, 'evaluation/final.x_velocity': 0.3831630288321423, 'evaluation/final.y_velocity': -0.29924983

 48%|████▊     | 480133/1000000 [12:02<1:04:06, 135.17it/s]

{'evaluation/reward_linvel': 0.5260664225347457, 'evaluation/reward_quadctrl': -0.6540866489652305, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.772732814685747, 'evaluation/y_position': -0.9491418679851351, 'evaluation/distance_from_origin': 2.957931356131216, 'evaluation/x_velocity': 0.42085313802779645, 'evaluation/y_velocity': -0.13660944225028, 'evaluation/forward_reward': 0.5260664225347457, 'evaluation/total.timesteps': 242309.5, 'evaluation/episode.return': 4746.282695411422, 'evaluation/episode.length': 974.2, 'evaluation/episode.duration': 0.5436959505081177, 'evaluation/final.reward_linvel': 0.7736514520533626, 'evaluation/final.reward_quadctrl': -0.7036360065590517, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.164229260932722, 'evaluation/final.y_position': -1.9966754882571442, 'evaluation/final.distance_from_origin': 6.502751934650936, 'evaluation/final.x_velocity': 0.61892116164269, 'evaluation/final.y_velocity': -0.2720859903224726

 49%|████▉     | 490089/1000000 [12:19<55:43, 152.49it/s]  

{'evaluation/reward_linvel': 0.6473524551027995, 'evaluation/reward_quadctrl': -0.7192132946510127, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.328981858940895, 'evaluation/y_position': -0.6606267976997997, 'evaluation/distance_from_origin': 3.413731098991258, 'evaluation/x_velocity': 0.5178819640822395, 'evaluation/y_velocity': -0.08968154983908917, 'evaluation/forward_reward': 0.6473524551027995, 'evaluation/total.timesteps': 251317.5, 'evaluation/episode.return': 4077.54234135781, 'evaluation/episode.length': 827.4, 'evaluation/episode.duration': 0.4685636520385742, 'evaluation/final.reward_linvel': 0.9608182282603943, 'evaluation/final.reward_quadctrl': -0.8131987887787486, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.444259461285545, 'evaluation/final.y_position': -1.1122909031628274, 'evaluation/final.distance_from_origin': 6.634295648194398, 'evaluation/final.x_velocity': 0.7686545826083154, 'evaluation/final.y_velocity': -0.0665958977576

 50%|█████     | 500141/1000000 [12:36<55:38, 149.71it/s]  

{'evaluation/reward_linvel': 0.6357257908639569, 'evaluation/reward_quadctrl': -0.6599923801992015, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.3085140038841043, 'evaluation/y_position': -1.3882946776834184, 'evaluation/distance_from_origin': 3.596645164779973, 'evaluation/x_velocity': 0.5085806326911655, 'evaluation/y_velocity': -0.2021472317194351, 'evaluation/forward_reward': 0.6357257908639569, 'evaluation/total.timesteps': 259674.0, 'evaluation/episode.return': 4199.021425259989, 'evaluation/episode.length': 843.9, 'evaluation/episode.duration': 0.48261270523071287, 'evaluation/final.reward_linvel': 0.9526429785036983, 'evaluation/final.reward_quadctrl': -0.6981016299091032, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.4512253872515455, 'evaluation/final.y_position': -2.5588074014893016, 'evaluation/final.distance_from_origin': 6.954284611995561, 'evaluation/final.x_velocity': 0.7621143828029587, 'evaluation/final.y_velocity': -0.2215251319

 51%|█████     | 510120/1000000 [12:54<56:31, 144.44it/s]  

{'evaluation/reward_linvel': 0.4482779876092868, 'evaluation/reward_quadctrl': -0.6791153747666204, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.4181306871925985, 'evaluation/y_position': -0.3352370533560305, 'evaluation/distance_from_origin': 2.4761348454634056, 'evaluation/x_velocity': 0.35862239008742935, 'evaluation/y_velocity': -0.05847356999576427, 'evaluation/forward_reward': 0.4482779876092868, 'evaluation/total.timesteps': 268458.5, 'evaluation/episode.return': 4354.245465525354, 'evaluation/episode.length': 913.0, 'evaluation/episode.duration': 0.517697024345398, 'evaluation/final.reward_linvel': 0.2996727276390254, 'evaluation/final.reward_quadctrl': -0.753095614078006, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.9250935266673865, 'evaluation/final.y_position': -0.8060363700219039, 'evaluation/final.distance_from_origin': 5.08893520954636, 'evaluation/final.x_velocity': 0.23973818211122028, 'evaluation/final.y_velocity': 0.02816806372

 52%|█████▏    | 520144/1000000 [13:13<1:00:54, 131.32it/s]

{'evaluation/reward_linvel': 0.5294216247026263, 'evaluation/reward_quadctrl': -0.6578248622391937, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.832295876811416, 'evaluation/y_position': -0.800810919615322, 'evaluation/distance_from_origin': 2.9706774354308205, 'evaluation/x_velocity': 0.4235372997621011, 'evaluation/y_velocity': -0.11872613204691415, 'evaluation/forward_reward': 0.5294216247026263, 'evaluation/total.timesteps': 278023.5, 'evaluation/episode.return': 4871.596762463433, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.574305534362793, 'evaluation/final.reward_linvel': 0.6307611633375418, 'evaluation/final.reward_quadctrl': -0.7335876391719398, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.367715142253061, 'evaluation/final.y_position': -1.780793864943798, 'evaluation/final.distance_from_origin': 6.634372780111365, 'evaluation/final.x_velocity': 0.5046089306700334, 'evaluation/final.y_velocity': -0.0768401253258

 53%|█████▎    | 530092/1000000 [13:30<50:21, 155.51it/s]  

{'evaluation/reward_linvel': 0.4836734142897546, 'evaluation/reward_quadctrl': -0.6860584960222457, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.72474760246671, 'evaluation/y_position': -0.5786200128579061, 'evaluation/distance_from_origin': 2.8015235935024756, 'evaluation/x_velocity': 0.38693873143180363, 'evaluation/y_velocity': -0.08122776852329516, 'evaluation/forward_reward': 0.4836734142897546, 'evaluation/total.timesteps': 287510.0, 'evaluation/episode.return': 4304.899866161435, 'evaluation/episode.length': 897.3, 'evaluation/episode.duration': 0.4867504596710205, 'evaluation/final.reward_linvel': 0.40493019964758864, 'evaluation/final.reward_quadctrl': -0.7597004339338003, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.221253417114127, 'evaluation/final.y_position': -1.0932428172807838, 'evaluation/final.distance_from_origin': 5.401385994270156, 'evaluation/final.x_velocity': 0.3239441597180709, 'evaluation/final.y_velocity': -0.1528127710

 54%|█████▍    | 540100/1000000 [13:48<54:59, 139.38it/s]

{'evaluation/reward_linvel': 0.4191202678139883, 'evaluation/reward_quadctrl': -0.7313908115795843, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.2554446593186626, 'evaluation/y_position': -0.134458746594946, 'evaluation/distance_from_origin': 2.2837371380958253, 'evaluation/x_velocity': 0.33529621425119066, 'evaluation/y_velocity': -0.014492369846101593, 'evaluation/forward_reward': 0.4191202678139883, 'evaluation/total.timesteps': 296579.0, 'evaluation/episode.return': 4296.304046638828, 'evaluation/episode.length': 916.5, 'evaluation/episode.duration': 0.526543402671814, 'evaluation/final.reward_linvel': 0.3983888332559934, 'evaluation/final.reward_quadctrl': -0.7833605784620208, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.6248329193542235, 'evaluation/final.y_position': -0.20296343197866248, 'evaluation/final.distance_from_origin': 4.655418126877488, 'evaluation/final.x_velocity': 0.3187110666047947, 'evaluation/final.y_velocity': -0.01338533

 55%|█████▌    | 550095/1000000 [14:05<51:17, 146.21it/s]

{'evaluation/reward_linvel': 0.5966465906231845, 'evaluation/reward_quadctrl': -0.67377687202038, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.295792447975175, 'evaluation/y_position': -0.6469213804485237, 'evaluation/distance_from_origin': 3.3765395302501795, 'evaluation/x_velocity': 0.47731727249854766, 'evaluation/y_velocity': -0.09009421995984566, 'evaluation/forward_reward': 0.5966465906231845, 'evaluation/total.timesteps': 305992.5, 'evaluation/episode.return': 4756.476722114032, 'evaluation/episode.length': 966.2, 'evaluation/episode.duration': 0.5350919961929321, 'evaluation/final.reward_linvel': 0.7523488090885435, 'evaluation/final.reward_quadctrl': -0.727111797819085, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.931008984366544, 'evaluation/final.y_position': -1.303926845738841, 'evaluation/final.distance_from_origin': 7.069952808161679, 'evaluation/final.x_velocity': 0.6018790472708347, 'evaluation/final.y_velocity': -0.15463688327237

 56%|█████▌    | 560131/1000000 [14:23<50:40, 144.68it/s]

{'evaluation/reward_linvel': 0.4623728613172471, 'evaluation/reward_quadctrl': -0.6815681198125942, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.792855113569036, 'evaluation/y_position': -0.7966665081410353, 'evaluation/distance_from_origin': 2.9257178435237363, 'evaluation/x_velocity': 0.3698982890537976, 'evaluation/y_velocity': -0.10729656848317752, 'evaluation/forward_reward': 0.4623728613172471, 'evaluation/total.timesteps': 315367.0, 'evaluation/episode.return': 4344.317268605277, 'evaluation/episode.length': 908.7, 'evaluation/episode.duration': 0.5138872146606446, 'evaluation/final.reward_linvel': 0.44177319880039007, 'evaluation/final.reward_quadctrl': -0.7309655965482115, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.053633959484909, 'evaluation/final.y_position': -1.4596821776398181, 'evaluation/final.distance_from_origin': 5.283060237484338, 'evaluation/final.x_velocity': 0.353418559040312, 'evaluation/final.y_velocity': -0.07220207287

 57%|█████▋    | 570077/1000000 [14:41<50:24, 142.16it/s]

{'evaluation/reward_linvel': 0.6505000376152957, 'evaluation/reward_quadctrl': -0.736133907270356, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.5578639627032587, 'evaluation/y_position': -0.7816677740476329, 'evaluation/distance_from_origin': 3.656048029675949, 'evaluation/x_velocity': 0.5204000300922366, 'evaluation/y_velocity': -0.10887379728940706, 'evaluation/forward_reward': 0.6505000376152957, 'evaluation/total.timesteps': 324568.0, 'evaluation/episode.return': 4577.732050416313, 'evaluation/episode.length': 931.5, 'evaluation/episode.duration': 0.5083434104919433, 'evaluation/final.reward_linvel': 0.9191690136293398, 'evaluation/final.reward_quadctrl': -0.7187681374052733, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.285432341925608, 'evaluation/final.y_position': -1.5195627731664367, 'evaluation/final.distance_from_origin': 7.455289060474046, 'evaluation/final.x_velocity': 0.7353352109034718, 'evaluation/final.y_velocity': -0.240015810919

 58%|█████▊    | 580141/1000000 [14:59<50:24, 138.82it/s]

{'evaluation/reward_linvel': 0.42891725139787723, 'evaluation/reward_quadctrl': -0.6625999391158237, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.494344698746395, 'evaluation/y_position': -0.09996032510192947, 'evaluation/distance_from_origin': 2.5149089130666993, 'evaluation/x_velocity': 0.3431338011183017, 'evaluation/y_velocity': -0.013402486733202648, 'evaluation/forward_reward': 0.42891725139787723, 'evaluation/total.timesteps': 334225.5, 'evaluation/episode.return': 4766.3173122820535, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.549703049659729, 'evaluation/final.reward_linvel': 0.4546168225718111, 'evaluation/final.reward_quadctrl': -0.6484412245275071, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.161762711512293, 'evaluation/final.y_position': -0.20401513773815722, 'evaluation/final.distance_from_origin': 5.183949610956253, 'evaluation/final.x_velocity': 0.3636934580574488, 'evaluation/final.y_velocity': 0.079539

 59%|█████▉    | 590136/1000000 [15:17<53:34, 127.49it/s]

{'evaluation/reward_linvel': 0.11911538588328635, 'evaluation/reward_quadctrl': -0.7159502341865841, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 0.6222791484412985, 'evaluation/y_position': 0.120724894467291, 'evaluation/distance_from_origin': 0.7088110974485, 'evaluation/x_velocity': 0.09529230870662907, 'evaluation/y_velocity': 0.00880766963029947, 'evaluation/forward_reward': 0.11911538588328635, 'evaluation/total.timesteps': 343919.5, 'evaluation/episode.return': 4133.691444412865, 'evaluation/episode.length': 938.8, 'evaluation/episode.duration': 0.571379566192627, 'evaluation/final.reward_linvel': 0.20222082656241244, 'evaluation/final.reward_quadctrl': -0.7958070380811838, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.35956283381998, 'evaluation/final.y_position': 0.12304294052520874, 'evaluation/final.distance_from_origin': 1.6535978384023777, 'evaluation/final.x_velocity': 0.16177666124992998, 'evaluation/final.y_velocity': -0.063717739835

 60%|██████    | 600151/1000000 [15:35<46:34, 143.08it/s]

{'evaluation/reward_linvel': 0.518797129814331, 'evaluation/reward_quadctrl': -0.6483821683228556, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.8523449814194923, 'evaluation/y_position': -0.6921636304867368, 'evaluation/distance_from_origin': 2.9641095314971726, 'evaluation/x_velocity': 0.4150377038514649, 'evaluation/y_velocity': -0.09977142522391148, 'evaluation/forward_reward': 0.518797129814331, 'evaluation/total.timesteps': 353077.5, 'evaluation/episode.return': 4348.306477619588, 'evaluation/episode.length': 892.8, 'evaluation/episode.duration': 0.5173444747924805, 'evaluation/final.reward_linvel': 0.7687550672391538, 'evaluation/final.reward_quadctrl': -0.7753097310076139, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.5763432943718865, 'evaluation/final.y_position': -1.3361596304503673, 'evaluation/final.distance_from_origin': 5.774093380280293, 'evaluation/final.x_velocity': 0.6150040537913231, 'evaluation/final.y_velocity': -0.16563848270

 61%|██████    | 610129/1000000 [15:51<31:45, 204.63it/s]

{'evaluation/reward_linvel': 0.35649231238098233, 'evaluation/reward_quadctrl': -0.7485090602341566, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.2819635652798438, 'evaluation/y_position': -0.18575511303698067, 'evaluation/distance_from_origin': 1.4046721439674934, 'evaluation/x_velocity': 0.2851938499047859, 'evaluation/y_velocity': -0.09154734516787429, 'evaluation/forward_reward': 0.35649231238098233, 'evaluation/total.timesteps': 360426.5, 'evaluation/episode.return': 2658.8063364887166, 'evaluation/episode.length': 577.0, 'evaluation/episode.duration': 0.3284873962402344, 'evaluation/final.reward_linvel': 0.6321392347160162, 'evaluation/final.reward_quadctrl': -0.9276982757251785, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 2.4852868587870653, 'evaluation/final.y_position': -0.79132403967094, 'evaluation/final.distance_from_origin': 2.754403391504108, 'evaluation/final.x_velocity': 0.505711387772813, 'evaluation/final.y_velocity': -0.44394901

 62%|██████▏   | 620121/1000000 [16:09<51:02, 124.04it/s]

{'evaluation/reward_linvel': 0.225714538719701, 'evaluation/reward_quadctrl': -0.6405318036739036, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.5390959661414572, 'evaluation/y_position': -0.07496499159999333, 'evaluation/distance_from_origin': 1.5713826880202062, 'evaluation/x_velocity': 0.18057163097576082, 'evaluation/y_velocity': -0.009955693478495675, 'evaluation/forward_reward': 0.225714538719701, 'evaluation/total.timesteps': 368311.5, 'evaluation/episode.return': 4585.182735045796, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.6062007427215577, 'evaluation/final.reward_linvel': 0.15912568920204245, 'evaluation/final.reward_quadctrl': -0.5958173466769188, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 2.7256829693329574, 'evaluation/final.y_position': -0.15283100843183517, 'evaluation/final.distance_from_origin': 2.7512964111277785, 'evaluation/final.x_velocity': 0.12730055136163396, 'evaluation/final.y_velocity': 0.0188

 63%|██████▎   | 630124/1000000 [16:28<46:01, 133.96it/s]

{'evaluation/reward_linvel': 0.5397038997564998, 'evaluation/reward_quadctrl': -0.6304533070437887, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.058676929423646, 'evaluation/y_position': -0.6299425387321612, 'evaluation/distance_from_origin': 3.146329381258549, 'evaluation/x_velocity': 0.43176311980519994, 'evaluation/y_velocity': -0.09822539042179493, 'evaluation/forward_reward': 0.5397038997564998, 'evaluation/total.timesteps': 378167.0, 'evaluation/episode.return': 4767.373250583313, 'evaluation/episode.length': 971.1, 'evaluation/episode.duration': 0.5589440584182739, 'evaluation/final.reward_linvel': 0.6494251518427714, 'evaluation/final.reward_quadctrl': -0.6616270219381795, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.300282986092128, 'evaluation/final.y_position': -1.4348670597391973, 'evaluation/final.distance_from_origin': 6.493177081046555, 'evaluation/final.x_velocity': 0.5195401214742172, 'evaluation/final.y_velocity': -0.24332214030

 64%|██████▍   | 640129/1000000 [16:45<39:45, 150.86it/s]

{'evaluation/reward_linvel': 0.1684162758096296, 'evaluation/reward_quadctrl': -0.6853790110145671, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.198287900699432, 'evaluation/y_position': -0.19451137744901673, 'evaluation/distance_from_origin': 1.2336718476685948, 'evaluation/x_velocity': 0.1347330206477037, 'evaluation/y_velocity': -0.01669661576263729, 'evaluation/forward_reward': 0.1684162758096296, 'evaluation/total.timesteps': 387185.5, 'evaluation/episode.return': 3732.5768266683685, 'evaluation/episode.length': 832.6, 'evaluation/episode.duration': 0.4872859001159668, 'evaluation/final.reward_linvel': -0.18436167914171298, 'evaluation/final.reward_quadctrl': -0.8390480239530762, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 1.6952178313203348, 'evaluation/final.y_position': -0.20608303487870225, 'evaluation/final.distance_from_origin': 1.9107977216724887, 'evaluation/final.x_velocity': -0.14748934331337038, 'evaluation/final.y_velocity': 0.025

 65%|██████▌   | 650078/1000000 [17:03<46:50, 124.48it/s]

{'evaluation/reward_linvel': 0.5479916347533696, 'evaluation/reward_quadctrl': -0.6579855488061643, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.9109217723273, 'evaluation/y_position': -0.7645213603509623, 'evaluation/distance_from_origin': 3.0364245461745827, 'evaluation/x_velocity': 0.43839330780269564, 'evaluation/y_velocity': -0.11743716302218511, 'evaluation/forward_reward': 0.5479916347533696, 'evaluation/total.timesteps': 396008.0, 'evaluation/episode.return': 4556.996671494197, 'evaluation/episode.length': 931.9, 'evaluation/episode.duration': 0.5419763088226318, 'evaluation/final.reward_linvel': 0.7037975345154158, 'evaluation/final.reward_quadctrl': -0.8057370158497982, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.142145059127068, 'evaluation/final.y_position': -1.6429141633796047, 'evaluation/final.distance_from_origin': 6.374234513056516, 'evaluation/final.x_velocity': 0.5630380276123326, 'evaluation/final.y_velocity': -0.014622884394

 66%|██████▌   | 660100/1000000 [17:22<44:30, 127.27it/s]

{'evaluation/reward_linvel': 0.45880222007004146, 'evaluation/reward_quadctrl': -0.6962192677171632, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.5317646096468436, 'evaluation/y_position': -0.5778534149607942, 'evaluation/distance_from_origin': 2.604825752573709, 'evaluation/x_velocity': 0.3670417760560332, 'evaluation/y_velocity': -0.07572611162113625, 'evaluation/forward_reward': 0.45880222007004146, 'evaluation/total.timesteps': 405622.5, 'evaluation/episode.return': 4719.719705781704, 'evaluation/episode.length': 991.0, 'evaluation/episode.duration': 0.5853536367416382, 'evaluation/final.reward_linvel': 0.5953566377355706, 'evaluation/final.reward_quadctrl': -0.6787672733490725, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.475437015627077, 'evaluation/final.y_position': -1.1285431199094762, 'evaluation/final.distance_from_origin': 5.600331432326885, 'evaluation/final.x_velocity': 0.47628531018845643, 'evaluation/final.y_velocity': 0.004006625

 67%|██████▋   | 670145/1000000 [17:40<39:42, 138.44it/s]

{'evaluation/reward_linvel': 0.4151491331565746, 'evaluation/reward_quadctrl': -0.6397429965855596, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.3966175619686663, 'evaluation/y_position': -0.10592209154546538, 'evaluation/distance_from_origin': 2.418474521493818, 'evaluation/x_velocity': 0.33211930652525967, 'evaluation/y_velocity': -0.0016454549527727378, 'evaluation/forward_reward': 0.4151491331565746, 'evaluation/total.timesteps': 415268.0, 'evaluation/episode.return': 4479.80849671727, 'evaluation/episode.length': 938.1, 'evaluation/episode.duration': 0.5316959857940674, 'evaluation/final.reward_linvel': 0.548409753067706, 'evaluation/final.reward_quadctrl': -0.6851574368134206, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.687295356711845, 'evaluation/final.y_position': -0.02213885903169574, 'evaluation/final.distance_from_origin': 4.741126335316019, 'evaluation/final.x_velocity': 0.4387278024541649, 'evaluation/final.y_velocity': 0.115316550

 68%|██████▊   | 680137/1000000 [17:58<41:21, 128.88it/s]

{'evaluation/reward_linvel': 0.525668349880023, 'evaluation/reward_quadctrl': -0.7279945344127204, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.003581525619336, 'evaluation/y_position': -0.2241556928391864, 'evaluation/distance_from_origin': 3.0284648020992995, 'evaluation/x_velocity': 0.42053467990401844, 'evaluation/y_velocity': -0.03943943542379223, 'evaluation/forward_reward': 0.525668349880023, 'evaluation/total.timesteps': 424958.5, 'evaluation/episode.return': 4797.673815467303, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5710113286972046, 'evaluation/final.reward_linvel': 0.510312057532499, 'evaluation/final.reward_quadctrl': -0.7127912134392341, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.323747492449955, 'evaluation/final.y_position': -0.5910562929928971, 'evaluation/final.distance_from_origin': 6.363778805916777, 'evaluation/final.x_velocity': 0.40824964602599917, 'evaluation/final.y_velocity': -0.07315096555

 69%|██████▉   | 690111/1000000 [18:16<39:52, 129.53it/s]

{'evaluation/reward_linvel': 0.498920155917828, 'evaluation/reward_quadctrl': -0.6734058106516749, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.7610034063006736, 'evaluation/y_position': -0.3214207955575963, 'evaluation/distance_from_origin': 2.8018269495182873, 'evaluation/x_velocity': 0.3991361247342625, 'evaluation/y_velocity': -0.05664226263760389, 'evaluation/forward_reward': 0.498920155917828, 'evaluation/total.timesteps': 434958.5, 'evaluation/episode.return': 4825.5143452661505, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5786554098129273, 'evaluation/final.reward_linvel': 0.5495089065322627, 'evaluation/final.reward_quadctrl': -0.5645377121264332, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.00714936714561, 'evaluation/final.y_position': -0.8469212700812594, 'evaluation/final.distance_from_origin': 6.0866645235893655, 'evaluation/final.x_velocity': 0.4396071252258101, 'evaluation/final.y_velocity': -0.0744384450

 70%|███████   | 700132/1000000 [18:35<40:32, 123.29it/s]

{'evaluation/reward_linvel': 0.5681685806936014, 'evaluation/reward_quadctrl': -0.6449012070150391, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.23286368146529, 'evaluation/y_position': -0.9693829298992583, 'evaluation/distance_from_origin': 3.382239626312768, 'evaluation/x_velocity': 0.4545348645548811, 'evaluation/y_velocity': -0.13301565228438703, 'evaluation/forward_reward': 0.5681685806936014, 'evaluation/total.timesteps': 444958.5, 'evaluation/episode.return': 4923.267373678562, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5809517621994018, 'evaluation/final.reward_linvel': 0.5916297093158478, 'evaluation/final.reward_quadctrl': -0.5748489346984544, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.831988267077442, 'evaluation/final.y_position': -1.9975908074202597, 'evaluation/final.distance_from_origin': 7.124245681078344, 'evaluation/final.x_velocity': 0.47330376745267816, 'evaluation/final.y_velocity': -0.16893878324

 71%|███████   | 710127/1000000 [18:52<31:47, 152.00it/s]

{'evaluation/reward_linvel': 0.5814560370180691, 'evaluation/reward_quadctrl': -0.707523306180671, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.2159926016332423, 'evaluation/y_position': -0.5341997240870998, 'evaluation/distance_from_origin': 3.2731566719512495, 'evaluation/x_velocity': 0.4651648296144553, 'evaluation/y_velocity': -0.07559058660437831, 'evaluation/forward_reward': 0.5814560370180691, 'evaluation/total.timesteps': 454395.0, 'evaluation/episode.return': 4324.640512072025, 'evaluation/episode.length': 887.3, 'evaluation/episode.duration': 0.49519107341766355, 'evaluation/final.reward_linvel': 0.6074800802594483, 'evaluation/final.reward_quadctrl': -0.6617047090676198, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.208451995475815, 'evaluation/final.y_position': -1.0029274085118132, 'evaluation/final.distance_from_origin': 6.297685321105883, 'evaluation/final.x_velocity': 0.4859840642075586, 'evaluation/final.y_velocity': -0.0177309363

 72%|███████▏  | 720105/1000000 [19:11<35:41, 130.69it/s]

{'evaluation/reward_linvel': 0.6077046383739483, 'evaluation/reward_quadctrl': -0.6942150058404395, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.338522365222299, 'evaluation/y_position': -1.2305150013940498, 'evaluation/distance_from_origin': 3.5767400708609185, 'evaluation/x_velocity': 0.4861637106991586, 'evaluation/y_velocity': -0.1809457983735016, 'evaluation/forward_reward': 0.6077046383739483, 'evaluation/total.timesteps': 463732.0, 'evaluation/episode.return': 4815.711188846091, 'evaluation/episode.length': 980.1, 'evaluation/episode.duration': 0.5697633981704712, 'evaluation/final.reward_linvel': 0.782525957751649, 'evaluation/final.reward_quadctrl': -0.7411208209145135, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.159283088916913, 'evaluation/final.y_position': -2.661904888527206, 'evaluation/final.distance_from_origin': 7.649465644642731, 'evaluation/final.x_velocity': 0.6260207662013192, 'evaluation/final.y_velocity': -0.21991485616322

 73%|███████▎  | 730158/1000000 [19:29<32:15, 139.40it/s]

{'evaluation/reward_linvel': 0.4615566110107425, 'evaluation/reward_quadctrl': -0.6571427422162327, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.4776641933634735, 'evaluation/y_position': -0.09921083266718646, 'evaluation/distance_from_origin': 2.502498568996209, 'evaluation/x_velocity': 0.369245288808594, 'evaluation/y_velocity': -0.019028825584130635, 'evaluation/forward_reward': 0.4615566110107425, 'evaluation/total.timesteps': 473201.5, 'evaluation/episode.return': 4390.273393304424, 'evaluation/episode.length': 913.8, 'evaluation/episode.duration': 0.5264890909194946, 'evaluation/final.reward_linvel': 0.6609038763116711, 'evaluation/final.reward_quadctrl': -0.7907478224578812, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.075325756307729, 'evaluation/final.y_position': -0.2602898647669767, 'evaluation/final.distance_from_origin': 5.114955060237103, 'evaluation/final.x_velocity': 0.528723101049337, 'evaluation/final.y_velocity': -0.20093487160

 74%|███████▍  | 740097/1000000 [19:46<32:13, 134.41it/s]

{'evaluation/reward_linvel': 0.5117066987351754, 'evaluation/reward_quadctrl': -0.739451339382609, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.802373659078516, 'evaluation/y_position': -0.5753018043864906, 'evaluation/distance_from_origin': 2.88869276353061, 'evaluation/x_velocity': 0.40936535898814025, 'evaluation/y_velocity': -0.08068115869072946, 'evaluation/forward_reward': 0.5117066987351754, 'evaluation/total.timesteps': 482429.0, 'evaluation/episode.return': 4446.310318308786, 'evaluation/episode.length': 931.7, 'evaluation/episode.duration': 0.5424061059951782, 'evaluation/final.reward_linvel': 0.6763550195998465, 'evaluation/final.reward_quadctrl': -0.7161038774929213, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.7350215048944495, 'evaluation/final.y_position': -1.1306755107372046, 'evaluation/final.distance_from_origin': 5.88076710468699, 'evaluation/final.x_velocity': 0.5410840156798773, 'evaluation/final.y_velocity': -0.0953637168924

 75%|███████▌  | 750108/1000000 [20:05<31:16, 133.17it/s]

{'evaluation/reward_linvel': 0.643817446362777, 'evaluation/reward_quadctrl': -0.7365635538108972, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.7115211986550283, 'evaluation/y_position': -0.8939069039739467, 'evaluation/distance_from_origin': 3.826146382240363, 'evaluation/x_velocity': 0.5150539570902216, 'evaluation/y_velocity': -0.13726638207705547, 'evaluation/forward_reward': 0.643817446362777, 'evaluation/total.timesteps': 491858.5, 'evaluation/episode.return': 4682.501664273002, 'evaluation/episode.length': 954.2, 'evaluation/episode.duration': 0.5414072751998902, 'evaluation/final.reward_linvel': 0.6240000679506216, 'evaluation/final.reward_quadctrl': -0.837949415612915, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.3879374981837085, 'evaluation/final.y_position': -1.9657773503673721, 'evaluation/final.distance_from_origin': 7.654290829337315, 'evaluation/final.x_velocity': 0.49920005436049736, 'evaluation/final.y_velocity': -0.198968719497

 76%|███████▌  | 760121/1000000 [20:23<29:23, 136.05it/s]

{'evaluation/reward_linvel': 0.5601566537772343, 'evaluation/reward_quadctrl': -0.7057007266135914, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.1667480913929142, 'evaluation/y_position': -1.5025859144986382, 'evaluation/distance_from_origin': 3.5122881521736753, 'evaluation/x_velocity': 0.4481253230217875, 'evaluation/y_velocity': -0.2174250808870454, 'evaluation/forward_reward': 0.5601566537772343, 'evaluation/total.timesteps': 501291.0, 'evaluation/episode.return': 4525.809260894663, 'evaluation/episode.length': 932.3, 'evaluation/episode.duration': 0.5401992797851562, 'evaluation/final.reward_linvel': 0.64615435474605, 'evaluation/final.reward_quadctrl': -0.7912393757058326, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.284230997318427, 'evaluation/final.y_position': -3.0400362326401384, 'evaluation/final.distance_from_origin': 6.9967725442812965, 'evaluation/final.x_velocity': 0.5169234837968399, 'evaluation/final.y_velocity': -0.277510955207

 77%|███████▋  | 770139/1000000 [20:41<29:31, 129.78it/s]

{'evaluation/reward_linvel': 0.681591775358878, 'evaluation/reward_quadctrl': -0.7275739556364778, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.784063781114046, 'evaluation/y_position': -0.8290584107253292, 'evaluation/distance_from_origin': 3.8967392617914447, 'evaluation/x_velocity': 0.5452734202871024, 'evaluation/y_velocity': -0.1357928563306513, 'evaluation/forward_reward': 0.681591775358878, 'evaluation/total.timesteps': 510790.5, 'evaluation/episode.return': 4793.507642363393, 'evaluation/episode.length': 967.6, 'evaluation/episode.duration': 0.5613776922225953, 'evaluation/final.reward_linvel': 0.8858489498695536, 'evaluation/final.reward_quadctrl': -0.7740162748120354, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.931158292566813, 'evaluation/final.y_position': -1.9663246060381472, 'evaluation/final.distance_from_origin': 8.207292850353983, 'evaluation/final.x_velocity': 0.7086791598956429, 'evaluation/final.y_velocity': -0.23697223211787

 78%|███████▊  | 780134/1000000 [21:00<28:41, 127.71it/s]

{'evaluation/reward_linvel': 0.5676142252476659, 'evaluation/reward_quadctrl': -0.6587606880212494, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.137893691632457, 'evaluation/y_position': -0.66435812200273, 'evaluation/distance_from_origin': 3.244408734502733, 'evaluation/x_velocity': 0.4540913801981326, 'evaluation/y_velocity': -0.10129354556519517, 'evaluation/forward_reward': 0.5676142252476659, 'evaluation/total.timesteps': 520547.0, 'evaluation/episode.return': 4828.839224569622, 'evaluation/episode.length': 983.7, 'evaluation/episode.duration': 0.5779303312301636, 'evaluation/final.reward_linvel': 0.6318657583391637, 'evaluation/final.reward_quadctrl': -0.6303885101742607, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.714754527277114, 'evaluation/final.y_position': -1.4911648739289147, 'evaluation/final.distance_from_origin': 6.9273252717898, 'evaluation/final.x_velocity': 0.505492606671331, 'evaluation/final.y_velocity': -0.15579606760779016

 79%|███████▉  | 790098/1000000 [21:18<27:08, 128.88it/s]

{'evaluation/reward_linvel': 0.4958035734990517, 'evaluation/reward_quadctrl': -0.6893588561015868, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.757327405220139, 'evaluation/y_position': -0.8156465050407776, 'evaluation/distance_from_origin': 2.8799384023895316, 'evaluation/x_velocity': 0.3966428587992413, 'evaluation/y_velocity': -0.12069231834593637, 'evaluation/forward_reward': 0.4958035734990517, 'evaluation/total.timesteps': 530372.5, 'evaluation/episode.return': 4717.044845653872, 'evaluation/episode.length': 981.4, 'evaluation/episode.duration': 0.5747812747955322, 'evaluation/final.reward_linvel': 0.6416281814384399, 'evaluation/final.reward_quadctrl': -0.7735979967536808, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.853211234290773, 'evaluation/final.y_position': -1.7769314383822608, 'evaluation/final.distance_from_origin': 6.130557237480924, 'evaluation/final.x_velocity': 0.5133025451507518, 'evaluation/final.y_velocity': -0.19807310737

 80%|████████  | 800107/1000000 [21:36<23:42, 140.50it/s]

{'evaluation/reward_linvel': 0.6500740513969809, 'evaluation/reward_quadctrl': -0.6780118481865777, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.632833402064997, 'evaluation/y_position': -0.7455081175199252, 'evaluation/distance_from_origin': 3.7185045628010167, 'evaluation/x_velocity': 0.5200592411175848, 'evaluation/y_velocity': -0.10555047254762212, 'evaluation/forward_reward': 0.6500740513969809, 'evaluation/total.timesteps': 540159.0, 'evaluation/episode.return': 4852.235504113033, 'evaluation/episode.length': 975.9, 'evaluation/episode.duration': 0.5487128734588623, 'evaluation/final.reward_linvel': 0.7273146803050097, 'evaluation/final.reward_quadctrl': -0.7087399306324893, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.633448635345596, 'evaluation/final.y_position': -1.5433084906249477, 'evaluation/final.distance_from_origin': 7.814965200236041, 'evaluation/final.x_velocity': 0.5818517442440079, 'evaluation/final.y_velocity': -0.13878417186

 81%|████████  | 810126/1000000 [21:55<24:39, 128.34it/s]

{'evaluation/reward_linvel': 0.6658704550057607, 'evaluation/reward_quadctrl': -0.6542990857362619, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.814834254061203, 'evaluation/y_position': -1.0324192550789766, 'evaluation/distance_from_origin': 3.9573999643530087, 'evaluation/x_velocity': 0.5326963640046084, 'evaluation/y_velocity': -0.14351058421725044, 'evaluation/forward_reward': 0.6658704550057607, 'evaluation/total.timesteps': 550038.5, 'evaluation/episode.return': 5011.571369269501, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5815248727798462, 'evaluation/final.reward_linvel': 0.705986069944727, 'evaluation/final.reward_quadctrl': -0.6988242109836842, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 8.011061558275893, 'evaluation/final.y_position': -2.1557731228608796, 'evaluation/final.distance_from_origin': 8.29693359863979, 'evaluation/final.x_velocity': 0.5647888559557817, 'evaluation/final.y_velocity': -0.105404119516

 82%|████████▏ | 820084/1000000 [22:13<22:59, 130.46it/s]

{'evaluation/reward_linvel': 0.6069056383157811, 'evaluation/reward_quadctrl': -0.7033411374806482, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.442711035948216, 'evaluation/y_position': -0.5993794488496241, 'evaluation/distance_from_origin': 3.511032013133837, 'evaluation/x_velocity': 0.48552451065262486, 'evaluation/y_velocity': -0.07674960592678798, 'evaluation/forward_reward': 0.6069056383157811, 'evaluation/total.timesteps': 560038.5, 'evaluation/episode.return': 4903.564500835135, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.571349024772644, 'evaluation/final.reward_linvel': 0.697781207883419, 'evaluation/final.reward_quadctrl': -0.6995650270033656, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.299181753571091, 'evaluation/final.y_position': -1.1512762204198972, 'evaluation/final.distance_from_origin': 7.408012495787919, 'evaluation/final.x_velocity': 0.5582249663067351, 'evaluation/final.y_velocity': -0.017041327749

 83%|████████▎ | 830155/1000000 [22:31<21:10, 133.64it/s]

{'evaluation/reward_linvel': 0.6843344293799662, 'evaluation/reward_quadctrl': -0.6958041293489545, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.8914270927604626, 'evaluation/y_position': -1.0590493256915146, 'evaluation/distance_from_origin': 4.041790962771142, 'evaluation/x_velocity': 0.5474675435039729, 'evaluation/y_velocity': -0.15328122186023965, 'evaluation/forward_reward': 0.6843344293799662, 'evaluation/total.timesteps': 569992.0, 'evaluation/episode.return': 4942.136968240723, 'evaluation/episode.length': 990.7, 'evaluation/episode.duration': 0.5692059993743896, 'evaluation/final.reward_linvel': 0.7670618588843178, 'evaluation/final.reward_quadctrl': -0.6394776135950295, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 8.150962613309833, 'evaluation/final.y_position': -2.2781703315963595, 'evaluation/final.distance_from_origin': 8.470944096426766, 'evaluation/final.x_velocity': 0.6136494871074543, 'evaluation/final.y_velocity': -0.19589047949

 84%|████████▍ | 840147/1000000 [22:50<19:23, 137.33it/s]

{'evaluation/reward_linvel': 0.5191366522430712, 'evaluation/reward_quadctrl': -0.6749214529002975, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.853908130074172, 'evaluation/y_position': -0.5068358255131373, 'evaluation/distance_from_origin': 2.9099908553235894, 'evaluation/x_velocity': 0.415309321794457, 'evaluation/y_velocity': -0.07078365827607194, 'evaluation/forward_reward': 0.5191366522430712, 'evaluation/total.timesteps': 579945.5, 'evaluation/episode.return': 4844.215199342774, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5543270111083984, 'evaluation/final.reward_linvel': 0.5978048731377713, 'evaluation/final.reward_quadctrl': -0.6070363271536064, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.246409724285346, 'evaluation/final.y_position': -1.0625919403646094, 'evaluation/final.distance_from_origin': 6.3516079707607425, 'evaluation/final.x_velocity': 0.47824389851021704, 'evaluation/final.y_velocity': -0.066104983

 85%|████████▌ | 850122/1000000 [23:07<18:35, 134.39it/s]

{'evaluation/reward_linvel': 0.4885707154614732, 'evaluation/reward_quadctrl': -0.6742345363560159, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.6785037434161167, 'evaluation/y_position': -0.41452849709150924, 'evaluation/distance_from_origin': 2.7229111295891086, 'evaluation/x_velocity': 0.3908565723691785, 'evaluation/y_velocity': -0.056611733658156624, 'evaluation/forward_reward': 0.4885707154614732, 'evaluation/total.timesteps': 589568.5, 'evaluation/episode.return': 4451.335231200908, 'evaluation/episode.length': 924.6, 'evaluation/episode.duration': 0.5362742185592652, 'evaluation/final.reward_linvel': 0.6636270821560116, 'evaluation/final.reward_quadctrl': -0.7992195105003698, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.4358078049125735, 'evaluation/final.y_position': -0.7817380641803464, 'evaluation/final.distance_from_origin': 5.5077094128256965, 'evaluation/final.x_velocity': 0.5309016657248093, 'evaluation/final.y_velocity': -0.249267

 86%|████████▌ | 860156/1000000 [23:26<18:25, 126.52it/s]

{'evaluation/reward_linvel': 0.6186950221733687, 'evaluation/reward_quadctrl': -0.70256219667696, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.5357854186950126, 'evaluation/y_position': -1.1134739709651402, 'evaluation/distance_from_origin': 3.7209632261696206, 'evaluation/x_velocity': 0.49495601773869485, 'evaluation/y_velocity': -0.15970493941080713, 'evaluation/forward_reward': 0.6186950221733687, 'evaluation/total.timesteps': 599191.5, 'evaluation/episode.return': 4916.132825496409, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5894077062606812, 'evaluation/final.reward_linvel': 0.658287365653935, 'evaluation/final.reward_quadctrl': -0.6413174718754359, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.438204268861213, 'evaluation/final.y_position': -2.396187988154983, 'evaluation/final.distance_from_origin': 7.822896545986032, 'evaluation/final.x_velocity': 0.5266298925231478, 'evaluation/final.y_velocity': -0.181921445077

 87%|████████▋ | 870138/1000000 [23:44<14:34, 148.52it/s]

{'evaluation/reward_linvel': 0.6413540583830933, 'evaluation/reward_quadctrl': -0.7338878896527535, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.2904777284644706, 'evaluation/y_position': -0.24692996510978074, 'evaluation/distance_from_origin': 3.35351093246593, 'evaluation/x_velocity': 0.5130832467064745, 'evaluation/y_velocity': -0.0439878828442412, 'evaluation/forward_reward': 0.6413540583830933, 'evaluation/total.timesteps': 608513.0, 'evaluation/episode.return': 4241.523009633633, 'evaluation/episode.length': 864.3, 'evaluation/episode.duration': 0.4819836378097534, 'evaluation/final.reward_linvel': 1.209134062978364, 'evaluation/final.reward_quadctrl': -0.9194503323163212, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.665531747358907, 'evaluation/final.y_position': -0.5697336236430839, 'evaluation/final.distance_from_origin': 6.721952318235755, 'evaluation/final.x_velocity': 0.967307250382691, 'evaluation/final.y_velocity': -0.06899022564652

 88%|████████▊ | 880117/1000000 [24:02<14:56, 133.79it/s]

{'evaluation/reward_linvel': 0.5632781164367542, 'evaluation/reward_quadctrl': -0.6738591340825675, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.1084628654994004, 'evaluation/y_position': -0.26127890542274873, 'evaluation/distance_from_origin': 3.1331729459930777, 'evaluation/x_velocity': 0.4506224931494033, 'evaluation/y_velocity': -0.043784714948655885, 'evaluation/forward_reward': 0.5632781164367542, 'evaluation/total.timesteps': 617645.0, 'evaluation/episode.return': 4704.110002922962, 'evaluation/episode.length': 962.1, 'evaluation/episode.duration': 0.5617257118225097, 'evaluation/final.reward_linvel': 0.6340168105384774, 'evaluation/final.reward_quadctrl': -0.7842074020714404, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.517863143750079, 'evaluation/final.y_position': -0.6326418164625179, 'evaluation/final.distance_from_origin': 6.565859681175411, 'evaluation/final.x_velocity': 0.5072134484307819, 'evaluation/final.y_velocity': -0.01490097

 89%|████████▉ | 890079/1000000 [24:20<14:44, 124.31it/s]

{'evaluation/reward_linvel': 0.587282782853396, 'evaluation/reward_quadctrl': -0.7125195138841642, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.3089505159890282, 'evaluation/y_position': -1.737619283312945, 'evaluation/distance_from_origin': 3.7527859270873645, 'evaluation/x_velocity': 0.46982622628271675, 'evaluation/y_velocity': -0.24520335730183282, 'evaluation/forward_reward': 0.587282782853396, 'evaluation/total.timesteps': 627181.5, 'evaluation/episode.return': 4607.626241829717, 'evaluation/episode.length': 945.2, 'evaluation/episode.duration': 0.5450111389160156, 'evaluation/final.reward_linvel': 0.5234188109456379, 'evaluation/final.reward_quadctrl': -0.7446263717544077, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.675678796568272, 'evaluation/final.y_position': -3.478860519015625, 'evaluation/final.distance_from_origin': 7.565014287337225, 'evaluation/final.x_velocity': 0.4187350487565104, 'evaluation/final.y_velocity': -0.2840155514107

 90%|█████████ | 900092/1000000 [24:37<10:31, 158.32it/s]

{'evaluation/reward_linvel': 0.5860862553826716, 'evaluation/reward_quadctrl': -0.7153197012871844, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.144876167417316, 'evaluation/y_position': -0.6651734995638483, 'evaluation/distance_from_origin': 3.2364237273092202, 'evaluation/x_velocity': 0.4688690043061372, 'evaluation/y_velocity': -0.10864487080438022, 'evaluation/forward_reward': 0.5860862553826716, 'evaluation/total.timesteps': 636223.0, 'evaluation/episode.return': 4203.958612839816, 'evaluation/episode.length': 863.1, 'evaluation/episode.duration': 0.4857943534851074, 'evaluation/final.reward_linvel': 0.715877740236302, 'evaluation/final.reward_quadctrl': -0.7544284039113002, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.086357996378491, 'evaluation/final.y_position': -1.408532851135082, 'evaluation/final.distance_from_origin': 6.288601193782656, 'evaluation/final.x_velocity': 0.5727021921890414, 'evaluation/final.y_velocity': -0.2502345037262

 91%|█████████ | 910146/1000000 [24:56<12:41, 117.96it/s]

{'evaluation/reward_linvel': 0.28794526368651896, 'evaluation/reward_quadctrl': -0.6817200435198659, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 1.7157768833042935, 'evaluation/y_position': 0.2659055686107182, 'evaluation/distance_from_origin': 1.7591108477598694, 'evaluation/x_velocity': 0.23035621094921516, 'evaluation/y_velocity': 0.03413247367248077, 'evaluation/forward_reward': 0.28794526368651896, 'evaluation/total.timesteps': 645538.5, 'evaluation/episode.return': 4606.225220166654, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.6167800188064575, 'evaluation/final.reward_linvel': 0.2814131193131157, 'evaluation/final.reward_quadctrl': -0.6725145136103868, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 3.472031864158121, 'evaluation/final.y_position': 0.5111057371630429, 'evaluation/final.distance_from_origin': 3.5309666100189228, 'evaluation/final.x_velocity': 0.22513049545049252, 'evaluation/final.y_velocity': 0.06349018

 92%|█████████▏| 920107/1000000 [25:14<09:38, 138.10it/s]

{'evaluation/reward_linvel': 0.4641335544015093, 'evaluation/reward_quadctrl': -0.6444934557936218, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.640416898746692, 'evaluation/y_position': -0.12198909784960557, 'evaluation/distance_from_origin': 2.6562961438527894, 'evaluation/x_velocity': 0.37130684352120735, 'evaluation/y_velocity': -0.01122652499760083, 'evaluation/forward_reward': 0.4641335544015093, 'evaluation/total.timesteps': 655084.0, 'evaluation/episode.return': 4381.534813644432, 'evaluation/episode.length': 909.1, 'evaluation/episode.duration': 0.5259582281112671, 'evaluation/final.reward_linvel': 0.39781331181645413, 'evaluation/final.reward_quadctrl': -0.6619520565623926, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.080429557233691, 'evaluation/final.y_position': -0.15153268071715206, 'evaluation/final.distance_from_origin': 5.147568431445303, 'evaluation/final.x_velocity': 0.3182506494531633, 'evaluation/final.y_velocity': -0.0531110

 93%|█████████▎| 930096/1000000 [25:32<08:29, 137.18it/s]

{'evaluation/reward_linvel': 0.5922406396363985, 'evaluation/reward_quadctrl': -0.6823396242577063, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.3345385569903083, 'evaluation/y_position': -0.7607302929566652, 'evaluation/distance_from_origin': 3.4325975156003428, 'evaluation/x_velocity': 0.4737925117091188, 'evaluation/y_velocity': -0.108944119399836, 'evaluation/forward_reward': 0.5922406396363985, 'evaluation/total.timesteps': 664437.5, 'evaluation/episode.return': 4721.360816388149, 'evaluation/episode.length': 961.6, 'evaluation/episode.duration': 0.5526026964187623, 'evaluation/final.reward_linvel': 0.7164049328812317, 'evaluation/final.reward_quadctrl': -0.7414257709476212, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.850180088917716, 'evaluation/final.y_position': -1.573878872846008, 'evaluation/final.distance_from_origin': 7.045937214930018, 'evaluation/final.x_velocity': 0.5731239463049853, 'evaluation/final.y_velocity': -0.1408402135437

 94%|█████████▍| 940094/1000000 [25:50<07:28, 133.58it/s]

{'evaluation/reward_linvel': 0.623891366498171, 'evaluation/reward_quadctrl': -0.7132588625761607, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.4962651137389567, 'evaluation/y_position': -1.2027826028821895, 'evaluation/distance_from_origin': 3.710802871242097, 'evaluation/x_velocity': 0.49911309319853675, 'evaluation/y_velocity': -0.16028540505329447, 'evaluation/forward_reward': 0.623891366498171, 'evaluation/total.timesteps': 674079.0, 'evaluation/episode.return': 4747.10844154141, 'evaluation/episode.length': 966.7, 'evaluation/episode.duration': 0.5512903213500977, 'evaluation/final.reward_linvel': 0.6267438455696513, 'evaluation/final.reward_quadctrl': -0.7405031400222448, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 7.255135026776666, 'evaluation/final.y_position': -2.3220711464155572, 'evaluation/final.distance_from_origin': 7.638178092786331, 'evaluation/final.x_velocity': 0.5013950764557211, 'evaluation/final.y_velocity': -0.1310771685339

 95%|█████████▌| 950099/1000000 [26:08<06:34, 126.61it/s]

{'evaluation/reward_linvel': 0.4677883409319062, 'evaluation/reward_quadctrl': -0.6675091151145791, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.683664029439982, 'evaluation/y_position': -0.5006699140418546, 'evaluation/distance_from_origin': 2.748013180723555, 'evaluation/x_velocity': 0.37423067274552485, 'evaluation/y_velocity': -0.07529956287727861, 'evaluation/forward_reward': 0.4677883409319062, 'evaluation/total.timesteps': 683912.5, 'evaluation/episode.return': 4800.279225817327, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5869668006896973, 'evaluation/final.reward_linvel': 0.4325679296524573, 'evaluation/final.reward_quadctrl': -0.7545731334791779, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.626669624496128, 'evaluation/final.y_position': -1.133433418646879, 'evaluation/final.distance_from_origin': 5.757939652217617, 'evaluation/final.x_velocity': 0.34605434372196575, 'evaluation/final.y_velocity': -0.0332745075

 96%|█████████▌| 960116/1000000 [26:26<04:41, 141.79it/s]

{'evaluation/reward_linvel': 0.5922003076487901, 'evaluation/reward_quadctrl': -0.6771020492098585, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 3.164095146444788, 'evaluation/y_position': -0.7853025164778343, 'evaluation/distance_from_origin': 3.269804628076434, 'evaluation/x_velocity': 0.47376024611903206, 'evaluation/y_velocity': -0.10579139919599066, 'evaluation/forward_reward': 0.5922003076487901, 'evaluation/total.timesteps': 693564.5, 'evaluation/episode.return': 4573.007419651582, 'evaluation/episode.length': 930.4, 'evaluation/episode.duration': 0.5349241733551026, 'evaluation/final.reward_linvel': 0.754600179341806, 'evaluation/final.reward_quadctrl': -0.6046655165961431, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 6.624958528075544, 'evaluation/final.y_position': -1.4759446706314985, 'evaluation/final.distance_from_origin': 6.793043523971029, 'evaluation/final.x_velocity': 0.6036801434734448, 'evaluation/final.y_velocity': -0.085905701396

 97%|█████████▋| 970103/1000000 [26:45<03:54, 127.23it/s]

{'evaluation/reward_linvel': 0.4349681239793837, 'evaluation/reward_quadctrl': -0.6545330491136341, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.469847998787307, 'evaluation/y_position': -0.4080572460730629, 'evaluation/distance_from_origin': 2.517467105651454, 'evaluation/x_velocity': 0.34797449918350704, 'evaluation/y_velocity': -0.05253664488168986, 'evaluation/forward_reward': 0.4349681239793837, 'evaluation/total.timesteps': 703216.5, 'evaluation/episode.return': 4780.43507486575, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5781773090362549, 'evaluation/final.reward_linvel': 0.4295195245307563, 'evaluation/final.reward_quadctrl': -0.6658524806705669, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 5.236952080690015, 'evaluation/final.y_position': -0.7892100006205824, 'evaluation/final.distance_from_origin': 5.320105573294409, 'evaluation/final.x_velocity': 0.3436156196246051, 'evaluation/final.y_velocity': -0.09351594738

 98%|█████████▊| 980084/1000000 [27:03<02:38, 125.88it/s]

{'evaluation/reward_linvel': 0.39272573899933566, 'evaluation/reward_quadctrl': -0.6235798508294409, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.248542166535392, 'evaluation/y_position': -0.46149568121050816, 'evaluation/distance_from_origin': 2.3024062565462535, 'evaluation/x_velocity': 0.3141805911994685, 'evaluation/y_velocity': -0.056490118221905854, 'evaluation/forward_reward': 0.39272573899933566, 'evaluation/total.timesteps': 713216.5, 'evaluation/episode.return': 4769.145888169894, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5884100437164307, 'evaluation/final.reward_linvel': 0.4321127350138122, 'evaluation/final.reward_quadctrl': -0.5670292630036305, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.722133960184426, 'evaluation/final.y_position': -0.8481899082279114, 'evaluation/final.distance_from_origin': 4.805484135738242, 'evaluation/final.x_velocity': 0.3456901880110497, 'evaluation/final.y_velocity': 0.0123371

 99%|█████████▉| 990157/1000000 [27:20<00:50, 195.41it/s]

{'evaluation/reward_linvel': 0.6929311979034711, 'evaluation/reward_quadctrl': -0.8220111701337677, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.728453971210801, 'evaluation/y_position': -0.26720033192523135, 'evaluation/distance_from_origin': 2.7636289635624403, 'evaluation/x_velocity': 0.5543449583227769, 'evaluation/y_velocity': -0.07095295192075414, 'evaluation/forward_reward': 0.6929311979034711, 'evaluation/total.timesteps': 721208.0, 'evaluation/episode.return': 2914.2714526146133, 'evaluation/episode.length': 598.3, 'evaluation/episode.duration': 0.3393065929412842, 'evaluation/final.reward_linvel': 1.518330410689876, 'evaluation/final.reward_quadctrl': -1.096188100540254, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.994428248688161, 'evaluation/final.y_position': -0.6320540252153176, 'evaluation/final.distance_from_origin': 5.063851228615454, 'evaluation/final.x_velocity': 1.2146643285519008, 'evaluation/final.y_velocity': -0.16777393081

100%|██████████| 1000000/1000000 [27:38<00:00, 603.08it/s]

{'evaluation/reward_linvel': 0.40464280880038034, 'evaluation/reward_quadctrl': -0.637901392495868, 'evaluation/reward_alive': 5.0, 'evaluation/x_position': 2.421232292415809, 'evaluation/y_position': 0.04348681162768253, 'evaluation/distance_from_origin': 2.435754818853999, 'evaluation/x_velocity': 0.32371424704030427, 'evaluation/y_velocity': 0.00988794390738605, 'evaluation/forward_reward': 0.40464280880038034, 'evaluation/total.timesteps': 729199.5, 'evaluation/episode.return': 4766.741416304513, 'evaluation/episode.length': 1000.0, 'evaluation/episode.duration': 0.5694445371627808, 'evaluation/final.reward_linvel': 0.4173464920448233, 'evaluation/final.reward_quadctrl': -0.5976481763812167, 'evaluation/final.reward_alive': 5.0, 'evaluation/final.x_position': 4.8709326853409864, 'evaluation/final.y_position': 0.15007856592094318, 'evaluation/final.distance_from_origin': 4.8955581274684485, 'evaluation/final.x_velocity': 0.33387719363585866, 'evaluation/final.y_velocity': 0.01638288


