In [2]:


import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

from stable_baselines3 import SAC
from stable_baselines3.sac.policies import SACPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.logger import configure


class EpisodicRewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.cumulative_reward = 0.0

    def step(self, action):
        result = self.env.step(action)
        if len(result) == 5:
            obs, reward, terminated, truncated, info = result
            done = terminated or truncated
        else:
            obs, reward, done, info = result
        self.cumulative_reward += reward
        if done:
            reward = self.cumulative_reward
            self.cumulative_reward = 0.0
        else:
            reward = 0.0
        return obs, reward, done, info

    def reset(self, **kwargs):
        self.cumulative_reward = 0.0
        result = self.env.reset(**kwargs)
        if isinstance(result, tuple):
            obs, _ = result
        else:
            obs = result
        return obs


class TrajectoryReplay:
    def __init__(self):
        self.trajectories = []

    def add_trajectory(self, traj):
        self.trajectories.append(traj)

    def sample(self, batch_size):

        indices = np.random.randint(0, len(self.trajectories), size=batch_size)
        return [self.trajectories[i] for i in indices]

    def __len__(self):
        return len(self.trajectories)


class RewardModel(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_size=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim + action_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )

    def forward(self, s, a):
        x = torch.cat([s, a], dim=-1)
        return self.net(x)


def sample_subsequence_indices(traj_length, K):
    if K > traj_length:
        K = traj_length
    return np.random.choice(traj_length, size=K, replace=False)

def train_reward_model_rrd(reward_model, optimizer, trajectories, K=64, device='cpu'):
    reward_model.train()
    M = len(trajectories)

    loss_sum = 0.0
    for traj in trajectories:
        R_ep = sum([t[2] for t in traj])
        s_list = []
        a_list = []
        for (s, a, r, s_next, done) in traj:
            s_list.append(s)
            a_list.append(a)

        states = torch.tensor(s_list, dtype=torch.float, device=device)
        actions = torch.tensor(a_list, dtype=torch.float, device=device)

        Tj = len(traj)
        idx_subseq = sample_subsequence_indices(Tj, K)

        R_sa_sub = reward_model(states[idx_subseq], actions[idx_subseq])
        sum_R_sa_sub = R_sa_sub.sum()

        diff = (R_ep - sum_R_sa_sub) ** 2
        loss_sum += diff

    loss = loss_sum / M
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss.item()

def collect_episodes(env, model, n_episodes, device='cpu'):
    trajectories = []
    for _ in range(n_episodes):
        result = env.reset()
        if isinstance(result, tuple):
            obs, _ = result
        else:
            obs = result
        done = False
        traj = []
        while not done:
            if obs is None or (hasattr(obs, '__len__') and len(obs) == 0):
                raise ValueError("Observation is empty, check env.reset() output")
            obs_tensor = torch.tensor(obs, dtype=torch.float, device=device).unsqueeze(0)
            with torch.no_grad():
                action, _states = model.predict(obs_tensor.cpu().numpy(), deterministic=False)
            result = env.step(action[0])
            if len(result) == 5:
                next_obs, reward, terminated, truncated, info = result
                done = terminated or truncated
            else:
                next_obs, reward, done, info = result
            traj.append((obs, action[0], reward, next_obs, done))
            obs = next_obs
        trajectories.append(traj)
    return trajectories

def add_shaped_transitions_to_replay(model, reward_model, trajectories, gamma=0.99, device='cpu'):
    for traj in trajectories:
        states = []
        actions = []
        next_states = []
        dones = []
        for (s, a, r, s_next, d) in traj:
            states.append(s)
            actions.append(a)
            next_states.append(s_next)
            dones.append(d)

        s_tensor = torch.tensor(states, dtype=torch.float, device=device)
        a_tensor = torch.tensor(actions, dtype=torch.float, device=device)

        with torch.no_grad():
            shaped_rewards = reward_model(s_tensor, a_tensor).cpu().numpy().squeeze(-1)

        for i in range(len(traj)):
            s, a, _, s_next, d = traj[i]
            r_shaped = shaped_rewards[i]
            model.replay_buffer.add(
                s, s_next, a, r_shaped, d, infos=[{}],
            )



def main():
    class Args:
        env = "BipedalWalker-v3"
        episodes = 8000
        steps_per_update = 200
        rrd_k = 64
        device = "cuda"
    
    args = Args()

    device = torch.device(args.device)

    base_env = gym.make(args.env)
    env = EpisodicRewardWrapper(base_env)
    vec_env = DummyVecEnv([lambda: env])

    policy_kwargs = dict(
        net_arch=dict(pi=[64, 64], qf=[64, 64])
    )
    model = SAC(
        policy="MlpPolicy",
        env=vec_env,
        policy_kwargs=policy_kwargs,
        verbose=1,
        seed=42,
        buffer_size=50000,
        learning_starts=0,
        train_freq=1,
        gradient_steps=0,
        batch_size=512,
        gamma=0.99,
        device=device
    )
    model._logger = configure(folder=None, format_strings=["stdout"])

    state_dim = base_env.observation_space.shape[0]
    action_dim = base_env.action_space.shape[0]
    reward_model = RewardModel(state_dim, action_dim).to(device)
    reward_optimizer = optim.Adam(reward_model.parameters(), lr=3e-4)

    traj_replay = TrajectoryReplay()

    ep_record = []
    for ep in tqdm(range(args.episodes)):
        new_trajectories = collect_episodes(env, model, n_episodes=1, device=device)
        for traj in new_trajectories:
            traj_replay.add_trajectory(traj)

        if len(traj_replay) >= 5:
            batch_size_trajectories = 4
            sampled_trajs = traj_replay.sample(batch_size_trajectories)
            rrd_loss = train_reward_model_rrd(
                reward_model, reward_optimizer, sampled_trajs,
                K=args.rrd_k, device=device
            )

        add_shaped_transitions_to_replay(model, reward_model, new_trajectories, device=device)
        if len(traj_replay) >= 5:
            model.train(args.steps_per_update)

        ep_return = sum([t[2] for t in new_trajectories[-1]])
        ep_record.append(ep_return)
        
        if ep % 10 == 0:
            print(f"Episode {ep+1}/{args.episodes}, real env return = {ep_return:.2f}")

    print("Training finished.")
    return ep_record

In [3]:
train1 = main()

Using cuda device


  0%|                                          | 2/8000 [00:00<19:22,  6.88it/s]

Episode 1/8000, real env return = -104.00


  0%|                                       | 11/8000 [00:09<2:30:32,  1.13s/it]

Episode 11/8000, real env return = -98.50


  0%|                                       | 21/8000 [00:28<4:32:59,  2.05s/it]

Episode 21/8000, real env return = -107.64


  0%|▏                                      | 31/8000 [00:45<4:14:59,  1.92s/it]

Episode 31/8000, real env return = -130.64


  1%|▏                                      | 41/8000 [01:01<3:14:19,  1.46s/it]

Episode 41/8000, real env return = -115.43


  1%|▏                                      | 51/8000 [01:13<2:34:02,  1.16s/it]

Episode 51/8000, real env return = -105.65


  1%|▎                                      | 61/8000 [01:27<3:28:03,  1.57s/it]

Episode 61/8000, real env return = -101.45


  1%|▎                                      | 71/8000 [01:41<3:01:40,  1.37s/it]

Episode 71/8000, real env return = -118.49


  1%|▍                                      | 81/8000 [02:06<6:15:45,  2.85s/it]

Episode 81/8000, real env return = -78.28


  1%|▍                                      | 91/8000 [02:33<6:12:23,  2.83s/it]

Episode 91/8000, real env return = -127.93


  1%|▍                                     | 101/8000 [02:59<5:38:35,  2.57s/it]

Episode 101/8000, real env return = -101.66


  1%|▌                                     | 111/8000 [03:16<2:58:13,  1.36s/it]

Episode 111/8000, real env return = -125.16


  2%|▌                                     | 121/8000 [03:28<2:35:34,  1.18s/it]

Episode 121/8000, real env return = -114.61


  2%|▌                                     | 131/8000 [03:39<2:35:26,  1.19s/it]

Episode 131/8000, real env return = -104.13


  2%|▋                                     | 141/8000 [03:51<2:35:00,  1.18s/it]

Episode 141/8000, real env return = -103.31


  2%|▋                                     | 151/8000 [04:05<2:33:23,  1.17s/it]

Episode 151/8000, real env return = -106.17


  2%|▊                                     | 161/8000 [04:22<3:00:49,  1.38s/it]

Episode 161/8000, real env return = -127.30


  2%|▊                                     | 171/8000 [04:35<3:02:11,  1.40s/it]

Episode 171/8000, real env return = -131.43


  2%|▊                                     | 181/8000 [04:56<4:03:06,  1.87s/it]

Episode 181/8000, real env return = -124.33


  2%|▉                                     | 191/8000 [05:12<2:59:19,  1.38s/it]

Episode 191/8000, real env return = -122.16


  3%|▉                                     | 201/8000 [05:24<2:41:11,  1.24s/it]

Episode 201/8000, real env return = -140.05


  3%|█                                     | 211/8000 [05:36<2:32:48,  1.18s/it]

Episode 211/8000, real env return = -128.79


  3%|█                                     | 221/8000 [05:54<2:57:16,  1.37s/it]

Episode 221/8000, real env return = -120.97


  3%|█                                     | 231/8000 [06:06<2:39:46,  1.23s/it]

Episode 231/8000, real env return = -119.22


  3%|█▏                                    | 241/8000 [06:18<2:39:41,  1.23s/it]

Episode 241/8000, real env return = -117.44


  3%|█▏                                    | 251/8000 [06:29<2:23:36,  1.11s/it]

Episode 251/8000, real env return = -114.05


  3%|█▏                                    | 261/8000 [06:41<2:37:00,  1.22s/it]

Episode 261/8000, real env return = -112.52


  3%|█▎                                    | 271/8000 [06:53<2:30:30,  1.17s/it]

Episode 271/8000, real env return = -110.33


  4%|█▎                                    | 281/8000 [07:05<2:30:24,  1.17s/it]

Episode 281/8000, real env return = -114.84


  4%|█▍                                    | 291/8000 [07:17<2:33:51,  1.20s/it]

Episode 291/8000, real env return = -110.83


  4%|█▍                                    | 301/8000 [07:28<2:24:59,  1.13s/it]

Episode 301/8000, real env return = -111.61


  4%|█▍                                    | 311/8000 [07:39<2:25:40,  1.14s/it]

Episode 311/8000, real env return = -104.39


  4%|█▌                                    | 321/8000 [07:51<2:31:09,  1.18s/it]

Episode 321/8000, real env return = -103.43


  4%|█▌                                    | 331/8000 [08:03<2:27:58,  1.16s/it]

Episode 331/8000, real env return = -101.55


  4%|█▌                                    | 341/8000 [08:15<2:24:20,  1.13s/it]

Episode 341/8000, real env return = -102.47


  4%|█▋                                    | 351/8000 [08:27<2:32:22,  1.20s/it]

Episode 351/8000, real env return = -103.22


  5%|█▋                                    | 361/8000 [08:38<2:19:33,  1.10s/it]

Episode 361/8000, real env return = -101.77


  5%|█▊                                    | 371/8000 [08:50<2:32:15,  1.20s/it]

Episode 371/8000, real env return = -109.76


  5%|█▊                                    | 381/8000 [09:01<2:25:44,  1.15s/it]

Episode 381/8000, real env return = -108.86


  5%|█▊                                    | 391/8000 [09:13<2:25:45,  1.15s/it]

Episode 391/8000, real env return = -108.41


  5%|█▉                                    | 401/8000 [09:24<2:26:06,  1.15s/it]

Episode 401/8000, real env return = -102.77


  5%|█▉                                    | 411/8000 [09:35<2:16:42,  1.08s/it]

Episode 411/8000, real env return = -105.81


  5%|█▉                                    | 421/8000 [09:47<2:22:58,  1.13s/it]

Episode 421/8000, real env return = -102.48


  5%|██                                    | 431/8000 [09:58<2:24:03,  1.14s/it]

Episode 431/8000, real env return = -103.77


  6%|██                                    | 441/8000 [10:09<2:25:27,  1.15s/it]

Episode 441/8000, real env return = -103.65


  6%|██▏                                   | 451/8000 [10:21<2:28:40,  1.18s/it]

Episode 451/8000, real env return = -101.39


  6%|██▏                                   | 461/8000 [10:33<2:36:23,  1.24s/it]

Episode 461/8000, real env return = -105.46


  6%|██▏                                   | 471/8000 [10:44<2:17:26,  1.10s/it]

Episode 471/8000, real env return = -102.02


  6%|██▎                                   | 481/8000 [10:56<2:28:25,  1.18s/it]

Episode 481/8000, real env return = -107.91


  6%|██▎                                   | 491/8000 [11:08<2:21:59,  1.13s/it]

Episode 491/8000, real env return = -107.97


  6%|██▍                                   | 501/8000 [11:19<2:15:10,  1.08s/it]

Episode 501/8000, real env return = -108.71


  6%|██▍                                   | 511/8000 [11:31<2:26:24,  1.17s/it]

Episode 511/8000, real env return = -106.27


  7%|██▍                                   | 521/8000 [11:42<2:13:06,  1.07s/it]

Episode 521/8000, real env return = -107.20


  7%|██▌                                   | 531/8000 [11:53<2:25:29,  1.17s/it]

Episode 531/8000, real env return = -106.50


  7%|██▌                                   | 541/8000 [12:04<2:22:14,  1.14s/it]

Episode 541/8000, real env return = -107.33


  7%|██▌                                   | 551/8000 [12:16<2:21:12,  1.14s/it]

Episode 551/8000, real env return = -106.55


  7%|██▋                                   | 561/8000 [12:28<2:28:04,  1.19s/it]

Episode 561/8000, real env return = -109.68


  7%|██▋                                   | 571/8000 [12:39<2:21:52,  1.15s/it]

Episode 571/8000, real env return = -107.54


  7%|██▊                                   | 581/8000 [12:51<2:22:36,  1.15s/it]

Episode 581/8000, real env return = -107.67


  7%|██▊                                   | 591/8000 [13:03<2:28:59,  1.21s/it]

Episode 591/8000, real env return = -109.37


  8%|██▊                                   | 601/8000 [13:14<2:14:11,  1.09s/it]

Episode 601/8000, real env return = -106.36


  8%|██▉                                   | 611/8000 [13:26<2:24:46,  1.18s/it]

Episode 611/8000, real env return = -110.26


  8%|██▉                                   | 621/8000 [13:37<2:19:50,  1.14s/it]

Episode 621/8000, real env return = -110.37


  8%|██▉                                   | 631/8000 [13:48<2:18:22,  1.13s/it]

Episode 631/8000, real env return = -107.91


  8%|███                                   | 641/8000 [14:00<2:27:36,  1.20s/it]

Episode 641/8000, real env return = -103.78


  8%|███                                   | 651/8000 [14:12<2:22:53,  1.17s/it]

Episode 651/8000, real env return = -107.19


  8%|███▏                                  | 661/8000 [14:23<2:16:06,  1.11s/it]

Episode 661/8000, real env return = -106.18


  8%|███▏                                  | 671/8000 [14:34<2:21:54,  1.16s/it]

Episode 671/8000, real env return = -106.54


  9%|███▏                                  | 681/8000 [14:46<2:24:14,  1.18s/it]

Episode 681/8000, real env return = -107.11


  9%|███▎                                  | 691/8000 [14:58<2:18:22,  1.14s/it]

Episode 691/8000, real env return = -104.81


  9%|███▎                                  | 701/8000 [15:10<2:28:47,  1.22s/it]

Episode 701/8000, real env return = -105.43


  9%|███▍                                  | 711/8000 [15:21<2:13:06,  1.10s/it]

Episode 711/8000, real env return = -105.27


  9%|███▍                                  | 721/8000 [15:32<2:21:50,  1.17s/it]

Episode 721/8000, real env return = -105.37


  9%|███▍                                  | 731/8000 [15:44<2:25:07,  1.20s/it]

Episode 731/8000, real env return = -104.40


  9%|███▌                                  | 741/8000 [15:55<2:13:42,  1.11s/it]

Episode 741/8000, real env return = -104.72


  9%|███▌                                  | 751/8000 [16:07<2:23:15,  1.19s/it]

Episode 751/8000, real env return = -102.92


 10%|███▌                                  | 761/8000 [16:18<2:15:13,  1.12s/it]

Episode 761/8000, real env return = -102.65


 10%|███▋                                  | 771/8000 [16:30<2:27:08,  1.22s/it]

Episode 771/8000, real env return = -104.29


 10%|███▋                                  | 781/8000 [16:42<2:22:48,  1.19s/it]

Episode 781/8000, real env return = -106.35


 10%|███▊                                  | 791/8000 [16:53<2:09:21,  1.08s/it]

Episode 791/8000, real env return = -104.29


 10%|███▊                                  | 801/8000 [17:05<2:19:49,  1.17s/it]

Episode 801/8000, real env return = -103.92


 10%|███▊                                  | 811/8000 [17:16<2:20:38,  1.17s/it]

Episode 811/8000, real env return = -108.04


 10%|███▉                                  | 821/8000 [17:27<2:10:39,  1.09s/it]

Episode 821/8000, real env return = -106.41


 10%|███▉                                  | 831/8000 [17:39<2:17:13,  1.15s/it]

Episode 831/8000, real env return = -106.67


 11%|███▉                                  | 841/8000 [17:50<2:14:06,  1.12s/it]

Episode 841/8000, real env return = -114.35


 11%|████                                  | 851/8000 [18:01<2:09:57,  1.09s/it]

Episode 851/8000, real env return = -108.58


 11%|████                                  | 861/8000 [18:13<2:19:58,  1.18s/it]

Episode 861/8000, real env return = -111.46


 11%|████▏                                 | 871/8000 [18:24<2:12:32,  1.12s/it]

Episode 871/8000, real env return = -110.50


 11%|████▏                                 | 881/8000 [18:36<2:21:38,  1.19s/it]

Episode 881/8000, real env return = -108.81


 11%|████▏                                 | 891/8000 [18:48<2:16:48,  1.15s/it]

Episode 891/8000, real env return = -114.50


 11%|████▎                                 | 901/8000 [18:59<2:11:17,  1.11s/it]

Episode 901/8000, real env return = -109.00


 11%|████▎                                 | 911/8000 [19:11<2:23:22,  1.21s/it]

Episode 911/8000, real env return = -110.05


 12%|████▎                                 | 921/8000 [19:22<2:10:29,  1.11s/it]

Episode 921/8000, real env return = -109.72


 12%|████▍                                 | 931/8000 [19:34<2:17:49,  1.17s/it]

Episode 931/8000, real env return = -108.32


 12%|████▍                                 | 941/8000 [19:46<2:12:32,  1.13s/it]

Episode 941/8000, real env return = -104.39


 12%|████▌                                 | 951/8000 [19:57<2:18:21,  1.18s/it]

Episode 951/8000, real env return = -104.80


 12%|████▌                                 | 961/8000 [20:09<2:11:40,  1.12s/it]

Episode 961/8000, real env return = -107.63


 12%|████▌                                 | 971/8000 [20:20<2:13:49,  1.14s/it]

Episode 971/8000, real env return = -110.60


 12%|████▋                                 | 981/8000 [20:31<2:09:25,  1.11s/it]

Episode 981/8000, real env return = -107.78


 12%|████▋                                 | 991/8000 [20:43<2:22:19,  1.22s/it]

Episode 991/8000, real env return = -106.95


 13%|████▋                                | 1001/8000 [20:55<2:16:22,  1.17s/it]

Episode 1001/8000, real env return = -106.76


 13%|████▋                                | 1011/8000 [21:06<2:08:40,  1.10s/it]

Episode 1011/8000, real env return = -105.64


 13%|████▋                                | 1021/8000 [21:18<2:16:08,  1.17s/it]

Episode 1021/8000, real env return = -108.71


 13%|████▊                                | 1031/8000 [21:29<2:09:13,  1.11s/it]

Episode 1031/8000, real env return = -104.34


 13%|████▊                                | 1041/8000 [21:41<2:18:47,  1.20s/it]

Episode 1041/8000, real env return = -105.46


 13%|████▊                                | 1051/8000 [21:52<2:14:43,  1.16s/it]

Episode 1051/8000, real env return = -105.23


 13%|████▉                                | 1061/8000 [22:04<2:07:27,  1.10s/it]

Episode 1061/8000, real env return = -110.38


 13%|████▉                                | 1071/8000 [22:16<2:13:58,  1.16s/it]

Episode 1071/8000, real env return = -111.60


 14%|████▉                                | 1081/8000 [22:27<2:13:53,  1.16s/it]

Episode 1081/8000, real env return = -110.12


 14%|█████                                | 1091/8000 [22:39<2:07:57,  1.11s/it]

Episode 1091/8000, real env return = -110.01


 14%|█████                                | 1101/8000 [22:50<2:09:30,  1.13s/it]

Episode 1101/8000, real env return = -106.67


 14%|█████▏                               | 1111/8000 [23:01<2:11:15,  1.14s/it]

Episode 1111/8000, real env return = -106.79


 14%|█████▏                               | 1121/8000 [23:13<2:09:24,  1.13s/it]

Episode 1121/8000, real env return = -111.20


 14%|█████▏                               | 1131/8000 [23:24<2:16:39,  1.19s/it]

Episode 1131/8000, real env return = -104.68


 14%|█████▎                               | 1141/8000 [23:36<2:06:48,  1.11s/it]

Episode 1141/8000, real env return = -110.89


 14%|█████▎                               | 1151/8000 [23:48<2:17:48,  1.21s/it]

Episode 1151/8000, real env return = -109.79


 15%|█████▎                               | 1161/8000 [23:59<2:18:10,  1.21s/it]

Episode 1161/8000, real env return = -110.05


 15%|█████▍                               | 1171/8000 [24:11<2:08:13,  1.13s/it]

Episode 1171/8000, real env return = -106.07


 15%|█████▍                               | 1181/8000 [24:23<2:17:52,  1.21s/it]

Episode 1181/8000, real env return = -110.56


 15%|█████▌                               | 1191/8000 [24:34<2:07:27,  1.12s/it]

Episode 1191/8000, real env return = -107.26


 15%|█████▌                               | 1201/8000 [24:46<2:11:47,  1.16s/it]

Episode 1201/8000, real env return = -111.09


 15%|█████▌                               | 1211/8000 [24:57<2:12:44,  1.17s/it]

Episode 1211/8000, real env return = -106.47


 15%|█████▋                               | 1221/8000 [25:09<2:05:07,  1.11s/it]

Episode 1221/8000, real env return = -106.09


 15%|█████▋                               | 1231/8000 [25:21<2:13:18,  1.18s/it]

Episode 1231/8000, real env return = -103.68


 16%|█████▋                               | 1241/8000 [25:33<2:19:17,  1.24s/it]

Episode 1241/8000, real env return = -104.47


 16%|█████▊                               | 1251/8000 [25:44<2:07:21,  1.13s/it]

Episode 1251/8000, real env return = -105.42


 16%|█████▊                               | 1261/8000 [25:56<2:14:33,  1.20s/it]

Episode 1261/8000, real env return = -109.66


 16%|█████▉                               | 1271/8000 [26:07<2:07:40,  1.14s/it]

Episode 1271/8000, real env return = -110.81


 16%|█████▉                               | 1281/8000 [26:19<2:04:31,  1.11s/it]

Episode 1281/8000, real env return = -108.81


 16%|█████▉                               | 1291/8000 [26:31<2:12:36,  1.19s/it]

Episode 1291/8000, real env return = -109.56


 16%|██████                               | 1301/8000 [26:42<2:00:19,  1.08s/it]

Episode 1301/8000, real env return = -111.25


 16%|██████                               | 1311/8000 [26:54<2:16:24,  1.22s/it]

Episode 1311/8000, real env return = -100.06


 17%|██████                               | 1321/8000 [27:05<2:04:49,  1.12s/it]

Episode 1321/8000, real env return = -104.70


 17%|██████▏                              | 1331/8000 [27:16<2:04:57,  1.12s/it]

Episode 1331/8000, real env return = -106.47


 17%|██████▏                              | 1341/8000 [27:28<2:09:27,  1.17s/it]

Episode 1341/8000, real env return = -108.86


 17%|██████▏                              | 1351/8000 [27:39<2:00:35,  1.09s/it]

Episode 1351/8000, real env return = -108.89


 17%|██████▎                              | 1361/8000 [27:51<2:08:03,  1.16s/it]

Episode 1361/8000, real env return = -110.48


 17%|██████▎                              | 1371/8000 [28:02<2:02:20,  1.11s/it]

Episode 1371/8000, real env return = -109.14


 17%|██████▍                              | 1381/8000 [28:13<2:05:28,  1.14s/it]

Episode 1381/8000, real env return = -108.10


 17%|██████▍                              | 1391/8000 [28:25<2:05:33,  1.14s/it]

Episode 1391/8000, real env return = -110.86


 18%|██████▍                              | 1401/8000 [28:36<2:06:42,  1.15s/it]

Episode 1401/8000, real env return = -109.75


 18%|██████▌                              | 1411/8000 [28:48<2:01:18,  1.10s/it]

Episode 1411/8000, real env return = -108.14


 18%|██████▌                              | 1421/8000 [28:59<2:09:35,  1.18s/it]

Episode 1421/8000, real env return = -109.63


 18%|██████▌                              | 1431/8000 [29:11<2:08:58,  1.18s/it]

Episode 1431/8000, real env return = -108.80


 18%|██████▋                              | 1441/8000 [29:22<2:05:36,  1.15s/it]

Episode 1441/8000, real env return = -110.27


 18%|██████▋                              | 1451/8000 [29:34<2:09:28,  1.19s/it]

Episode 1451/8000, real env return = -109.06


 18%|██████▊                              | 1461/8000 [29:45<1:59:58,  1.10s/it]

Episode 1461/8000, real env return = -104.33


 18%|██████▊                              | 1471/8000 [29:56<2:05:13,  1.15s/it]

Episode 1471/8000, real env return = -107.22


 19%|██████▊                              | 1481/8000 [30:08<2:04:31,  1.15s/it]

Episode 1481/8000, real env return = -108.76


 19%|██████▉                              | 1491/8000 [30:18<1:55:16,  1.06s/it]

Episode 1491/8000, real env return = -109.79


 19%|██████▉                              | 1501/8000 [30:30<2:08:46,  1.19s/it]

Episode 1501/8000, real env return = -110.19


 19%|██████▉                              | 1511/8000 [30:41<1:55:02,  1.06s/it]

Episode 1511/8000, real env return = -108.45


 19%|███████                              | 1521/8000 [30:53<1:58:30,  1.10s/it]

Episode 1521/8000, real env return = -109.04


 19%|███████                              | 1531/8000 [31:04<2:00:28,  1.12s/it]

Episode 1531/8000, real env return = -110.48


 19%|███████▏                             | 1541/8000 [31:15<1:54:18,  1.06s/it]

Episode 1541/8000, real env return = -108.54


 19%|███████▏                             | 1551/8000 [31:27<2:09:54,  1.21s/it]

Episode 1551/8000, real env return = -108.26


 20%|███████▏                             | 1561/8000 [31:38<1:58:32,  1.10s/it]

Episode 1561/8000, real env return = -109.97


 20%|███████▎                             | 1571/8000 [31:49<2:00:56,  1.13s/it]

Episode 1571/8000, real env return = -110.14


 20%|███████▎                             | 1581/8000 [32:00<2:04:06,  1.16s/it]

Episode 1581/8000, real env return = -109.24


 20%|███████▎                             | 1591/8000 [32:11<1:54:55,  1.08s/it]

Episode 1591/8000, real env return = -109.28


 20%|███████▍                             | 1601/8000 [32:23<2:07:35,  1.20s/it]

Episode 1601/8000, real env return = -109.16


 20%|███████▍                             | 1611/8000 [32:34<2:01:20,  1.14s/it]

Episode 1611/8000, real env return = -110.09


 20%|███████▍                             | 1621/8000 [32:46<2:05:45,  1.18s/it]

Episode 1621/8000, real env return = -108.56


 20%|███████▌                             | 1631/8000 [32:57<1:58:34,  1.12s/it]

Episode 1631/8000, real env return = -110.34


 21%|███████▌                             | 1641/8000 [33:08<1:51:57,  1.06s/it]

Episode 1641/8000, real env return = -110.29


 21%|███████▋                             | 1651/8000 [33:20<2:06:57,  1.20s/it]

Episode 1651/8000, real env return = -108.67


 21%|███████▋                             | 1661/8000 [33:31<1:59:32,  1.13s/it]

Episode 1661/8000, real env return = -109.91


 21%|███████▋                             | 1671/8000 [33:43<2:00:41,  1.14s/it]

Episode 1671/8000, real env return = -110.75


 21%|███████▊                             | 1681/8000 [33:55<2:03:12,  1.17s/it]

Episode 1681/8000, real env return = -109.00


 21%|███████▊                             | 1691/8000 [34:06<2:01:00,  1.15s/it]

Episode 1691/8000, real env return = -109.33


 21%|███████▊                             | 1701/8000 [34:17<1:57:07,  1.12s/it]

Episode 1701/8000, real env return = -109.45


 21%|███████▉                             | 1711/8000 [34:29<2:03:32,  1.18s/it]

Episode 1711/8000, real env return = -109.46


 22%|███████▉                             | 1721/8000 [34:40<2:08:51,  1.23s/it]

Episode 1721/8000, real env return = -108.32


 22%|████████                             | 1731/8000 [34:52<1:54:03,  1.09s/it]

Episode 1731/8000, real env return = -108.92


 22%|████████                             | 1741/8000 [35:03<1:58:40,  1.14s/it]

Episode 1741/8000, real env return = -108.26


 22%|████████                             | 1751/8000 [35:14<1:52:55,  1.08s/it]

Episode 1751/8000, real env return = -109.25


 22%|████████▏                            | 1761/8000 [35:26<2:00:20,  1.16s/it]

Episode 1761/8000, real env return = -110.43


 22%|████████▏                            | 1771/8000 [35:37<2:02:00,  1.18s/it]

Episode 1771/8000, real env return = -102.11


 22%|████████▏                            | 1781/8000 [35:49<1:51:40,  1.08s/it]

Episode 1781/8000, real env return = -110.55


 22%|████████▎                            | 1791/8000 [36:01<2:07:01,  1.23s/it]

Episode 1791/8000, real env return = -110.77


 23%|████████▎                            | 1801/8000 [36:12<1:52:28,  1.09s/it]

Episode 1801/8000, real env return = -109.18


 23%|████████▍                            | 1811/8000 [36:24<2:01:13,  1.18s/it]

Episode 1811/8000, real env return = -109.35


 23%|████████▍                            | 1821/8000 [36:35<1:57:15,  1.14s/it]

Episode 1821/8000, real env return = -108.88


 23%|████████▍                            | 1831/8000 [36:46<1:51:08,  1.08s/it]

Episode 1831/8000, real env return = -109.44


 23%|████████▌                            | 1841/8000 [36:58<2:00:33,  1.17s/it]

Episode 1841/8000, real env return = -108.87


 23%|████████▌                            | 1851/8000 [37:09<1:53:57,  1.11s/it]

Episode 1851/8000, real env return = -109.23


 23%|████████▌                            | 1861/8000 [37:21<1:59:18,  1.17s/it]

Episode 1861/8000, real env return = -106.51


 23%|████████▋                            | 1871/8000 [37:32<1:55:33,  1.13s/it]

Episode 1871/8000, real env return = -106.96


 24%|████████▋                            | 1881/8000 [37:43<1:54:24,  1.12s/it]

Episode 1881/8000, real env return = -107.02


 24%|████████▋                            | 1891/8000 [37:55<1:54:48,  1.13s/it]

Episode 1891/8000, real env return = -110.91


 24%|████████▊                            | 1901/8000 [38:06<1:58:01,  1.16s/it]

Episode 1901/8000, real env return = -109.76


 24%|████████▊                            | 1911/8000 [38:18<1:55:01,  1.13s/it]

Episode 1911/8000, real env return = -109.04


 24%|████████▉                            | 1921/8000 [38:29<1:59:41,  1.18s/it]

Episode 1921/8000, real env return = -108.82


 24%|████████▉                            | 1931/8000 [38:41<1:57:12,  1.16s/it]

Episode 1931/8000, real env return = -108.78


 24%|████████▉                            | 1941/8000 [38:52<1:52:57,  1.12s/it]

Episode 1941/8000, real env return = -109.57


 24%|█████████                            | 1951/8000 [39:03<1:57:59,  1.17s/it]

Episode 1951/8000, real env return = -107.65


 25%|█████████                            | 1961/8000 [39:14<1:50:01,  1.09s/it]

Episode 1961/8000, real env return = -106.46


 25%|█████████                            | 1971/8000 [39:26<1:50:56,  1.10s/it]

Episode 1971/8000, real env return = -108.63


 25%|█████████▏                           | 1981/8000 [39:37<1:57:55,  1.18s/it]

Episode 1981/8000, real env return = -109.45


 25%|█████████▏                           | 1991/8000 [39:48<1:52:11,  1.12s/it]

Episode 1991/8000, real env return = -108.07


 25%|█████████▎                           | 2001/8000 [40:00<1:57:15,  1.17s/it]

Episode 2001/8000, real env return = -111.02


 25%|█████████▎                           | 2011/8000 [40:10<1:51:30,  1.12s/it]

Episode 2011/8000, real env return = -110.54


 25%|█████████▎                           | 2021/8000 [40:22<1:47:20,  1.08s/it]

Episode 2021/8000, real env return = -110.40


 25%|█████████▍                           | 2031/8000 [40:33<1:58:42,  1.19s/it]

Episode 2031/8000, real env return = -110.16


 26%|█████████▍                           | 2041/8000 [40:44<1:46:03,  1.07s/it]

Episode 2041/8000, real env return = -109.03


 26%|█████████▍                           | 2051/8000 [40:56<1:50:09,  1.11s/it]

Episode 2051/8000, real env return = -107.05


 26%|█████████▌                           | 2061/8000 [41:07<1:48:57,  1.10s/it]

Episode 2061/8000, real env return = -109.22


 26%|█████████▌                           | 2071/8000 [41:18<1:49:20,  1.11s/it]

Episode 2071/8000, real env return = -107.92


 26%|█████████▌                           | 2081/8000 [41:30<1:57:48,  1.19s/it]

Episode 2081/8000, real env return = -109.28


 26%|█████████▋                           | 2091/8000 [41:41<1:51:05,  1.13s/it]

Episode 2091/8000, real env return = -106.48


 26%|█████████▋                           | 2101/8000 [41:53<1:53:53,  1.16s/it]

Episode 2101/8000, real env return = -105.38


 26%|█████████▊                           | 2111/8000 [42:04<1:51:23,  1.13s/it]

Episode 2111/8000, real env return = -110.92


 27%|█████████▊                           | 2121/8000 [42:16<1:54:10,  1.17s/it]

Episode 2121/8000, real env return = -131.30


 27%|█████████▊                           | 2131/8000 [42:28<1:55:57,  1.19s/it]

Episode 2131/8000, real env return = -101.61


 27%|█████████▉                           | 2141/8000 [42:39<1:54:04,  1.17s/it]

Episode 2141/8000, real env return = -122.00


 27%|█████████▉                           | 2151/8000 [42:51<1:50:44,  1.14s/it]

Episode 2151/8000, real env return = -119.33


 27%|█████████▉                           | 2161/8000 [43:03<1:51:44,  1.15s/it]

Episode 2161/8000, real env return = -116.12


 27%|██████████                           | 2171/8000 [43:14<1:49:43,  1.13s/it]

Episode 2171/8000, real env return = -103.66


 27%|██████████                           | 2181/8000 [43:26<1:47:18,  1.11s/it]

Episode 2181/8000, real env return = -116.36


 27%|██████████▏                          | 2191/8000 [43:37<1:59:49,  1.24s/it]

Episode 2191/8000, real env return = -118.09


 28%|██████████▏                          | 2201/8000 [43:49<1:45:43,  1.09s/it]

Episode 2201/8000, real env return = -116.00


 28%|██████████▏                          | 2211/8000 [44:01<2:03:10,  1.28s/it]

Episode 2211/8000, real env return = -109.51


 28%|██████████▎                          | 2221/8000 [44:12<1:51:05,  1.15s/it]

Episode 2221/8000, real env return = -115.29


 28%|██████████▎                          | 2231/8000 [44:24<1:48:07,  1.12s/it]

Episode 2231/8000, real env return = -113.60


 28%|██████████▎                          | 2241/8000 [44:35<1:52:22,  1.17s/it]

Episode 2241/8000, real env return = -113.64


 28%|██████████▍                          | 2251/8000 [44:47<1:48:49,  1.14s/it]

Episode 2251/8000, real env return = -101.90


 28%|██████████▍                          | 2261/8000 [44:59<1:48:06,  1.13s/it]

Episode 2261/8000, real env return = -113.54


 28%|██████████▌                          | 2271/8000 [45:10<1:49:21,  1.15s/it]

Episode 2271/8000, real env return = -112.45


 29%|██████████▌                          | 2281/8000 [45:22<1:49:22,  1.15s/it]

Episode 2281/8000, real env return = -114.96


 29%|██████████▌                          | 2291/8000 [45:34<1:48:09,  1.14s/it]

Episode 2291/8000, real env return = -100.86


 29%|██████████▋                          | 2301/8000 [45:45<1:56:17,  1.22s/it]

Episode 2301/8000, real env return = -118.51


 29%|██████████▋                          | 2311/8000 [45:57<1:46:39,  1.12s/it]

Episode 2311/8000, real env return = -113.33


 29%|██████████▋                          | 2321/8000 [46:08<1:56:38,  1.23s/it]

Episode 2321/8000, real env return = -114.69


 29%|██████████▊                          | 2331/8000 [46:20<1:50:19,  1.17s/it]

Episode 2331/8000, real env return = -102.19


 29%|██████████▊                          | 2341/8000 [46:31<1:46:19,  1.13s/it]

Episode 2341/8000, real env return = -112.65


 29%|██████████▊                          | 2351/8000 [46:43<1:49:36,  1.16s/it]

Episode 2351/8000, real env return = -118.09


 30%|██████████▉                          | 2361/8000 [46:54<1:45:32,  1.12s/it]

Episode 2361/8000, real env return = -114.19


 30%|██████████▉                          | 2371/8000 [47:07<1:54:34,  1.22s/it]

Episode 2371/8000, real env return = -101.17


 30%|███████████                          | 2381/8000 [47:18<1:53:31,  1.21s/it]

Episode 2381/8000, real env return = -100.94


 30%|███████████                          | 2391/8000 [47:29<1:52:24,  1.20s/it]

Episode 2391/8000, real env return = -112.48


 30%|███████████                          | 2401/8000 [47:41<1:49:56,  1.18s/it]

Episode 2401/8000, real env return = -114.22


 30%|███████████▏                         | 2411/8000 [47:52<1:50:48,  1.19s/it]

Episode 2411/8000, real env return = -99.92


 30%|███████████▏                         | 2421/8000 [48:04<1:43:40,  1.12s/it]

Episode 2421/8000, real env return = -100.86


 30%|███████████▏                         | 2431/8000 [48:15<1:44:21,  1.12s/it]

Episode 2431/8000, real env return = -113.69


 31%|███████████▎                         | 2441/8000 [48:27<1:43:02,  1.11s/it]

Episode 2441/8000, real env return = -113.60


 31%|███████████▎                         | 2451/8000 [48:38<1:46:19,  1.15s/it]

Episode 2451/8000, real env return = -101.30


 31%|███████████▍                         | 2461/8000 [48:50<1:53:35,  1.23s/it]

Episode 2461/8000, real env return = -101.21


 31%|███████████▍                         | 2471/8000 [49:02<1:44:18,  1.13s/it]

Episode 2471/8000, real env return = -113.39


 31%|███████████▍                         | 2481/8000 [49:14<1:53:31,  1.23s/it]

Episode 2481/8000, real env return = -115.46


 31%|███████████▌                         | 2491/8000 [49:25<1:47:47,  1.17s/it]

Episode 2491/8000, real env return = -101.71


 31%|███████████▌                         | 2501/8000 [49:36<1:43:18,  1.13s/it]

Episode 2501/8000, real env return = -101.18


 31%|███████████▌                         | 2511/8000 [49:48<1:47:56,  1.18s/it]

Episode 2511/8000, real env return = -113.66


 32%|███████████▋                         | 2521/8000 [49:59<1:41:47,  1.11s/it]

Episode 2521/8000, real env return = -113.20


 32%|███████████▋                         | 2531/8000 [50:11<1:43:09,  1.13s/it]

Episode 2531/8000, real env return = -101.47


 32%|███████████▊                         | 2541/8000 [50:23<1:45:56,  1.16s/it]

Episode 2541/8000, real env return = -101.06


 32%|███████████▊                         | 2551/8000 [50:34<1:49:13,  1.20s/it]

Episode 2551/8000, real env return = -114.44


 32%|███████████▊                         | 2561/8000 [50:46<1:42:17,  1.13s/it]

Episode 2561/8000, real env return = -113.80


 32%|███████████▉                         | 2571/8000 [50:57<1:50:28,  1.22s/it]

Episode 2571/8000, real env return = -100.42


 32%|███████████▉                         | 2581/8000 [51:09<1:38:58,  1.10s/it]

Episode 2581/8000, real env return = -100.82


 32%|███████████▉                         | 2591/8000 [51:20<1:43:49,  1.15s/it]

Episode 2591/8000, real env return = -112.25


 33%|████████████                         | 2601/8000 [51:31<1:38:56,  1.10s/it]

Episode 2601/8000, real env return = -113.03


 33%|████████████                         | 2611/8000 [51:43<1:44:49,  1.17s/it]

Episode 2611/8000, real env return = -100.76


 33%|████████████                         | 2621/8000 [51:54<1:46:07,  1.18s/it]

Episode 2621/8000, real env return = -100.97


 33%|████████████▏                        | 2631/8000 [52:06<1:46:46,  1.19s/it]

Episode 2631/8000, real env return = -115.46


 33%|████████████▏                        | 2641/8000 [52:17<1:37:05,  1.09s/it]

Episode 2641/8000, real env return = -111.64


 33%|████████████▎                        | 2651/8000 [52:29<1:49:11,  1.22s/it]

Episode 2651/8000, real env return = -100.49


 33%|████████████▎                        | 2661/8000 [52:41<1:38:38,  1.11s/it]

Episode 2661/8000, real env return = -100.97


 33%|████████████▎                        | 2671/8000 [52:53<1:39:25,  1.12s/it]

Episode 2671/8000, real env return = -113.49


 34%|████████████▍                        | 2681/8000 [53:04<1:42:25,  1.16s/it]

Episode 2681/8000, real env return = -112.55


 34%|████████████▍                        | 2691/8000 [53:16<1:44:08,  1.18s/it]

Episode 2691/8000, real env return = -100.15


 34%|████████████▍                        | 2701/8000 [53:27<1:46:06,  1.20s/it]

Episode 2701/8000, real env return = -99.61


 34%|████████████▌                        | 2711/8000 [53:39<1:38:54,  1.12s/it]

Episode 2711/8000, real env return = -111.90


 34%|████████████▌                        | 2721/8000 [53:50<1:42:39,  1.17s/it]

Episode 2721/8000, real env return = -112.22


 34%|████████████▋                        | 2731/8000 [54:02<1:41:10,  1.15s/it]

Episode 2731/8000, real env return = -99.58


 34%|████████████▋                        | 2741/8000 [54:14<1:42:54,  1.17s/it]

Episode 2741/8000, real env return = -99.72


 34%|████████████▋                        | 2751/8000 [54:25<1:37:59,  1.12s/it]

Episode 2751/8000, real env return = -112.07


 35%|████████████▊                        | 2761/8000 [54:37<1:44:31,  1.20s/it]

Episode 2761/8000, real env return = -113.83


 35%|████████████▊                        | 2771/8000 [54:48<1:36:18,  1.11s/it]

Episode 2771/8000, real env return = -99.98


 35%|████████████▊                        | 2781/8000 [55:00<1:36:42,  1.11s/it]

Episode 2781/8000, real env return = -99.83


 35%|████████████▉                        | 2791/8000 [55:11<1:40:16,  1.16s/it]

Episode 2791/8000, real env return = -99.76


 35%|████████████▉                        | 2801/8000 [55:23<1:33:10,  1.08s/it]

Episode 2801/8000, real env return = -112.75


 35%|█████████████                        | 2811/8000 [55:35<1:45:11,  1.22s/it]

Episode 2811/8000, real env return = -99.66


 35%|█████████████                        | 2821/8000 [55:46<1:38:16,  1.14s/it]

Episode 2821/8000, real env return = -99.30


 35%|█████████████                        | 2831/8000 [55:58<1:39:59,  1.16s/it]

Episode 2831/8000, real env return = -100.62


 36%|█████████████▏                       | 2841/8000 [56:09<1:40:20,  1.17s/it]

Episode 2841/8000, real env return = -111.96


 36%|█████████████▏                       | 2851/8000 [56:20<1:31:35,  1.07s/it]

Episode 2851/8000, real env return = -102.02


 36%|█████████████▏                       | 2861/8000 [56:32<1:37:45,  1.14s/it]

Episode 2861/8000, real env return = -101.12


 36%|█████████████▎                       | 2871/8000 [56:44<1:39:49,  1.17s/it]

Episode 2871/8000, real env return = -102.35


 36%|█████████████▎                       | 2881/8000 [56:55<1:33:42,  1.10s/it]

Episode 2881/8000, real env return = -112.49


 36%|█████████████▎                       | 2891/8000 [57:07<1:37:39,  1.15s/it]

Episode 2891/8000, real env return = -100.04


 36%|█████████████▍                       | 2901/8000 [57:18<1:38:30,  1.16s/it]

Episode 2901/8000, real env return = -99.53


 36%|█████████████▍                       | 2911/8000 [57:29<1:33:56,  1.11s/it]

Episode 2911/8000, real env return = -100.39


 37%|█████████████▌                       | 2921/8000 [57:41<1:35:31,  1.13s/it]

Episode 2921/8000, real env return = -111.65


 37%|█████████████▌                       | 2931/8000 [57:52<1:31:11,  1.08s/it]

Episode 2931/8000, real env return = -100.09


 37%|█████████████▌                       | 2941/8000 [58:04<1:39:46,  1.18s/it]

Episode 2941/8000, real env return = -100.18


 37%|█████████████▋                       | 2951/8000 [58:15<1:39:58,  1.19s/it]

Episode 2951/8000, real env return = -100.18


 37%|█████████████▋                       | 2961/8000 [58:27<1:36:39,  1.15s/it]

Episode 2961/8000, real env return = -113.90


 37%|█████████████▋                       | 2971/8000 [58:39<1:38:04,  1.17s/it]

Episode 2971/8000, real env return = -99.86


 37%|█████████████▊                       | 2981/8000 [58:50<1:38:26,  1.18s/it]

Episode 2981/8000, real env return = -99.48


 37%|█████████████▊                       | 2991/8000 [59:02<1:33:42,  1.12s/it]

Episode 2991/8000, real env return = -99.75


 38%|█████████████▉                       | 3001/8000 [59:14<1:41:04,  1.21s/it]

Episode 3001/8000, real env return = -113.44


 38%|█████████████▉                       | 3011/8000 [59:25<1:36:05,  1.16s/it]

Episode 3011/8000, real env return = -99.30


 38%|█████████████▉                       | 3021/8000 [59:37<1:39:29,  1.20s/it]

Episode 3021/8000, real env return = -99.72


 38%|██████████████                       | 3031/8000 [59:49<1:40:30,  1.21s/it]

Episode 3031/8000, real env return = -99.25


 38%|█████████████▎                     | 3041/8000 [1:00:01<1:33:24,  1.13s/it]

Episode 3041/8000, real env return = -112.01


 38%|█████████████▎                     | 3051/8000 [1:00:13<1:40:02,  1.21s/it]

Episode 3051/8000, real env return = -100.90


 38%|█████████████▍                     | 3061/8000 [1:00:24<1:33:11,  1.13s/it]

Episode 3061/8000, real env return = -101.49


 38%|█████████████▍                     | 3071/8000 [1:00:36<1:34:21,  1.15s/it]

Episode 3071/8000, real env return = -101.41


 39%|█████████████▍                     | 3081/8000 [1:00:47<1:34:15,  1.15s/it]

Episode 3081/8000, real env return = -112.62


 39%|█████████████▌                     | 3091/8000 [1:00:59<1:37:36,  1.19s/it]

Episode 3091/8000, real env return = -102.63


 39%|█████████████▌                     | 3101/8000 [1:01:10<1:30:56,  1.11s/it]

Episode 3101/8000, real env return = -101.36


 39%|█████████████▌                     | 3111/8000 [1:01:22<1:36:38,  1.19s/it]

Episode 3111/8000, real env return = -102.59


 39%|█████████████▋                     | 3121/8000 [1:01:34<1:36:05,  1.18s/it]

Episode 3121/8000, real env return = -114.34


 39%|█████████████▋                     | 3131/8000 [1:01:46<1:36:42,  1.19s/it]

Episode 3131/8000, real env return = -102.78


 39%|█████████████▋                     | 3141/8000 [1:01:57<1:32:30,  1.14s/it]

Episode 3141/8000, real env return = -102.26


 39%|█████████████▊                     | 3151/8000 [1:02:08<1:30:41,  1.12s/it]

Episode 3151/8000, real env return = -99.85


 40%|█████████████▊                     | 3161/8000 [1:02:20<1:31:21,  1.13s/it]

Episode 3161/8000, real env return = -112.95


 40%|█████████████▊                     | 3171/8000 [1:02:31<1:29:37,  1.11s/it]

Episode 3171/8000, real env return = -101.57


 40%|█████████████▉                     | 3181/8000 [1:02:43<1:33:43,  1.17s/it]

Episode 3181/8000, real env return = -102.43


 40%|█████████████▉                     | 3191/8000 [1:02:54<1:32:59,  1.16s/it]

Episode 3191/8000, real env return = -102.64


 40%|██████████████                     | 3201/8000 [1:03:05<1:27:51,  1.10s/it]

Episode 3201/8000, real env return = -102.78


 40%|██████████████                     | 3211/8000 [1:03:17<1:33:03,  1.17s/it]

Episode 3211/8000, real env return = -102.78


 40%|██████████████                     | 3221/8000 [1:03:29<1:31:48,  1.15s/it]

Episode 3221/8000, real env return = -102.48


 40%|██████████████▏                    | 3231/8000 [1:03:40<1:28:52,  1.12s/it]

Episode 3231/8000, real env return = -102.56


 41%|██████████████▏                    | 3241/8000 [1:03:52<1:35:25,  1.20s/it]

Episode 3241/8000, real env return = -102.85


 41%|██████████████▏                    | 3251/8000 [1:04:04<1:30:23,  1.14s/it]

Episode 3251/8000, real env return = -112.17


 41%|██████████████▎                    | 3261/8000 [1:04:16<1:36:15,  1.22s/it]

Episode 3261/8000, real env return = -111.48


 41%|██████████████▎                    | 3271/8000 [1:04:27<1:31:54,  1.17s/it]

Episode 3271/8000, real env return = -117.20


 41%|██████████████▎                    | 3281/8000 [1:04:39<1:35:32,  1.21s/it]

Episode 3281/8000, real env return = -112.84


 41%|██████████████▍                    | 3291/8000 [1:04:51<1:31:53,  1.17s/it]

Episode 3291/8000, real env return = -112.74


 41%|██████████████▍                    | 3301/8000 [1:05:02<1:28:09,  1.13s/it]

Episode 3301/8000, real env return = -112.48


 41%|██████████████▍                    | 3311/8000 [1:05:13<1:27:56,  1.13s/it]

Episode 3311/8000, real env return = -112.98


 42%|██████████████▌                    | 3321/8000 [1:05:25<1:30:42,  1.16s/it]

Episode 3321/8000, real env return = -114.97


 42%|██████████████▌                    | 3331/8000 [1:05:37<1:29:55,  1.16s/it]

Episode 3331/8000, real env return = -112.88


 42%|██████████████▌                    | 3341/8000 [1:05:48<1:28:46,  1.14s/it]

Episode 3341/8000, real env return = -112.86


 42%|██████████████▋                    | 3351/8000 [1:06:00<1:32:35,  1.19s/it]

Episode 3351/8000, real env return = -115.57


 42%|██████████████▋                    | 3361/8000 [1:06:11<1:26:49,  1.12s/it]

Episode 3361/8000, real env return = -112.56


 42%|██████████████▋                    | 3371/8000 [1:06:23<1:32:40,  1.20s/it]

Episode 3371/8000, real env return = -112.31


 42%|██████████████▊                    | 3381/8000 [1:06:35<1:29:50,  1.17s/it]

Episode 3381/8000, real env return = -112.51


 42%|██████████████▊                    | 3391/8000 [1:06:47<1:30:16,  1.18s/it]

Episode 3391/8000, real env return = -112.59


 43%|██████████████▉                    | 3401/8000 [1:06:58<1:30:30,  1.18s/it]

Episode 3401/8000, real env return = -111.89


 43%|██████████████▉                    | 3411/8000 [1:07:09<1:24:01,  1.10s/it]

Episode 3411/8000, real env return = -112.40


 43%|██████████████▉                    | 3421/8000 [1:07:21<1:30:46,  1.19s/it]

Episode 3421/8000, real env return = -112.54


 43%|███████████████                    | 3431/8000 [1:07:32<1:26:57,  1.14s/it]

Episode 3431/8000, real env return = -112.49


 43%|███████████████                    | 3441/8000 [1:07:44<1:24:02,  1.11s/it]

Episode 3441/8000, real env return = -112.28


 43%|███████████████                    | 3451/8000 [1:07:55<1:28:25,  1.17s/it]

Episode 3451/8000, real env return = -112.77


 43%|███████████████▏                   | 3461/8000 [1:08:06<1:21:16,  1.07s/it]

Episode 3461/8000, real env return = -112.41


 43%|███████████████▏                   | 3471/8000 [1:08:18<1:29:35,  1.19s/it]

Episode 3471/8000, real env return = -112.09


 44%|███████████████▏                   | 3481/8000 [1:08:29<1:24:53,  1.13s/it]

Episode 3481/8000, real env return = -112.64


 44%|███████████████▎                   | 3491/8000 [1:08:40<1:25:47,  1.14s/it]

Episode 3491/8000, real env return = -112.84


 44%|███████████████▎                   | 3501/8000 [1:08:52<1:28:33,  1.18s/it]

Episode 3501/8000, real env return = -112.77


 44%|███████████████▎                   | 3511/8000 [1:09:04<1:26:13,  1.15s/it]

Episode 3511/8000, real env return = -113.91


 44%|███████████████▍                   | 3521/8000 [1:09:15<1:23:58,  1.13s/it]

Episode 3521/8000, real env return = -112.69


 44%|███████████████▍                   | 3531/8000 [1:09:27<1:30:05,  1.21s/it]

Episode 3531/8000, real env return = -112.73


 44%|███████████████▍                   | 3541/8000 [1:09:38<1:20:30,  1.08s/it]

Episode 3541/8000, real env return = -112.80


 44%|███████████████▌                   | 3551/8000 [1:09:49<1:23:24,  1.12s/it]

Episode 3551/8000, real env return = -112.17


 45%|███████████████▌                   | 3561/8000 [1:10:01<1:25:44,  1.16s/it]

Episode 3561/8000, real env return = -111.79


 45%|███████████████▌                   | 3571/8000 [1:10:13<1:23:55,  1.14s/it]

Episode 3571/8000, real env return = -118.94


 45%|███████████████▋                   | 3581/8000 [1:10:24<1:25:19,  1.16s/it]

Episode 3581/8000, real env return = -112.28


 45%|███████████████▋                   | 3591/8000 [1:10:35<1:22:29,  1.12s/it]

Episode 3591/8000, real env return = -112.50


 45%|███████████████▊                   | 3601/8000 [1:10:47<1:27:00,  1.19s/it]

Episode 3601/8000, real env return = -111.94


 45%|███████████████▊                   | 3611/8000 [1:10:58<1:22:01,  1.12s/it]

Episode 3611/8000, real env return = -102.14


 45%|███████████████▊                   | 3621/8000 [1:11:09<1:19:29,  1.09s/it]

Episode 3621/8000, real env return = -113.44


 45%|███████████████▉                   | 3631/8000 [1:11:21<1:24:26,  1.16s/it]

Episode 3631/8000, real env return = -112.80


 46%|███████████████▉                   | 3641/8000 [1:11:32<1:20:03,  1.10s/it]

Episode 3641/8000, real env return = -113.43


 46%|███████████████▉                   | 3651/8000 [1:11:44<1:24:30,  1.17s/it]

Episode 3651/8000, real env return = -102.08


 46%|████████████████                   | 3661/8000 [1:11:56<1:22:34,  1.14s/it]

Episode 3661/8000, real env return = -112.93


 46%|████████████████                   | 3671/8000 [1:12:07<1:19:59,  1.11s/it]

Episode 3671/8000, real env return = -113.12


 46%|████████████████                   | 3681/8000 [1:12:18<1:27:32,  1.22s/it]

Episode 3681/8000, real env return = -112.60


 46%|████████████████▏                  | 3691/8000 [1:12:30<1:20:07,  1.12s/it]

Episode 3691/8000, real env return = -101.60


 46%|████████████████▏                  | 3701/8000 [1:12:42<1:27:34,  1.22s/it]

Episode 3701/8000, real env return = -112.91


 46%|████████████████▏                  | 3711/8000 [1:12:53<1:22:33,  1.15s/it]

Episode 3711/8000, real env return = -113.36


 47%|████████████████▎                  | 3721/8000 [1:13:05<1:20:40,  1.13s/it]

Episode 3721/8000, real env return = -110.45


 47%|████████████████▎                  | 3731/8000 [1:13:17<1:23:37,  1.18s/it]

Episode 3731/8000, real env return = -101.69


 47%|████████████████▎                  | 3741/8000 [1:13:28<1:21:58,  1.15s/it]

Episode 3741/8000, real env return = -112.41


 47%|████████████████▍                  | 3751/8000 [1:13:40<1:23:54,  1.18s/it]

Episode 3751/8000, real env return = -113.15


 47%|████████████████▍                  | 3761/8000 [1:13:51<1:21:37,  1.16s/it]

Episode 3761/8000, real env return = -110.69


 47%|████████████████▍                  | 3771/8000 [1:14:03<1:23:21,  1.18s/it]

Episode 3771/8000, real env return = -101.29


 47%|████████████████▌                  | 3781/8000 [1:14:14<1:19:11,  1.13s/it]

Episode 3781/8000, real env return = -111.79


 47%|████████████████▌                  | 3791/8000 [1:14:26<1:23:32,  1.19s/it]

Episode 3791/8000, real env return = -112.15


 48%|████████████████▋                  | 3801/8000 [1:14:38<1:17:21,  1.11s/it]

Episode 3801/8000, real env return = -111.15


 48%|████████████████▋                  | 3811/8000 [1:14:50<1:25:04,  1.22s/it]

Episode 3811/8000, real env return = -100.66


 48%|████████████████▋                  | 3821/8000 [1:15:01<1:20:09,  1.15s/it]

Episode 3821/8000, real env return = -111.92


 48%|████████████████▊                  | 3831/8000 [1:15:13<1:18:03,  1.12s/it]

Episode 3831/8000, real env return = -111.96


 48%|████████████████▊                  | 3841/8000 [1:15:25<1:20:32,  1.16s/it]

Episode 3841/8000, real env return = -112.60


 48%|████████████████▊                  | 3851/8000 [1:15:36<1:17:18,  1.12s/it]

Episode 3851/8000, real env return = -100.03


 48%|████████████████▉                  | 3861/8000 [1:15:48<1:21:31,  1.18s/it]

Episode 3861/8000, real env return = -110.13


 48%|████████████████▉                  | 3871/8000 [1:15:59<1:19:16,  1.15s/it]

Episode 3871/8000, real env return = -111.77


 49%|████████████████▉                  | 3881/8000 [1:16:10<1:16:51,  1.12s/it]

Episode 3881/8000, real env return = -111.71


 49%|█████████████████                  | 3891/8000 [1:16:22<1:21:55,  1.20s/it]

Episode 3891/8000, real env return = -100.98


 49%|█████████████████                  | 3901/8000 [1:16:33<1:16:04,  1.11s/it]

Episode 3901/8000, real env return = -111.43


 49%|█████████████████                  | 3911/8000 [1:16:45<1:16:07,  1.12s/it]

Episode 3911/8000, real env return = -112.26


 49%|█████████████████▏                 | 3921/8000 [1:16:56<1:17:07,  1.13s/it]

Episode 3921/8000, real env return = -111.93


 49%|█████████████████▏                 | 3931/8000 [1:17:08<1:19:36,  1.17s/it]

Episode 3931/8000, real env return = -100.84


 49%|█████████████████▏                 | 3941/8000 [1:17:20<1:20:36,  1.19s/it]

Episode 3941/8000, real env return = -111.51


 49%|█████████████████▎                 | 3951/8000 [1:17:31<1:21:17,  1.20s/it]

Episode 3951/8000, real env return = -112.49


 50%|█████████████████▎                 | 3961/8000 [1:17:43<1:14:06,  1.10s/it]

Episode 3961/8000, real env return = -111.72


 50%|█████████████████▎                 | 3971/8000 [1:17:55<1:25:50,  1.28s/it]

Episode 3971/8000, real env return = -100.79


 50%|█████████████████▍                 | 3981/8000 [1:18:06<1:14:07,  1.11s/it]

Episode 3981/8000, real env return = -111.88


 50%|█████████████████▍                 | 3991/8000 [1:18:18<1:17:52,  1.17s/it]

Episode 3991/8000, real env return = -111.80


 50%|█████████████████▌                 | 4001/8000 [1:18:29<1:17:06,  1.16s/it]

Episode 4001/8000, real env return = -111.61


 50%|█████████████████▌                 | 4011/8000 [1:18:41<1:12:47,  1.09s/it]

Episode 4011/8000, real env return = -102.61


 50%|█████████████████▌                 | 4021/8000 [1:18:53<1:21:16,  1.23s/it]

Episode 4021/8000, real env return = -100.75


 50%|█████████████████▋                 | 4031/8000 [1:19:04<1:13:10,  1.11s/it]

Episode 4031/8000, real env return = -112.65


 51%|█████████████████▋                 | 4041/8000 [1:19:15<1:19:10,  1.20s/it]

Episode 4041/8000, real env return = -112.16


 51%|█████████████████▋                 | 4051/8000 [1:19:27<1:17:54,  1.18s/it]

Episode 4051/8000, real env return = -100.33


 51%|█████████████████▊                 | 4061/8000 [1:19:38<1:14:17,  1.13s/it]

Episode 4061/8000, real env return = -100.36


 51%|█████████████████▊                 | 4071/8000 [1:19:50<1:16:46,  1.17s/it]

Episode 4071/8000, real env return = -110.85


 51%|█████████████████▊                 | 4081/8000 [1:20:01<1:14:05,  1.13s/it]

Episode 4081/8000, real env return = -111.01


 51%|█████████████████▉                 | 4091/8000 [1:20:13<1:14:10,  1.14s/it]

Episode 4091/8000, real env return = -100.91


 51%|█████████████████▉                 | 4101/8000 [1:20:24<1:13:58,  1.14s/it]

Episode 4101/8000, real env return = -102.59


 51%|█████████████████▉                 | 4111/8000 [1:20:35<1:13:09,  1.13s/it]

Episode 4111/8000, real env return = -112.10


 52%|██████████████████                 | 4121/8000 [1:20:47<1:14:07,  1.15s/it]

Episode 4121/8000, real env return = -112.07


 52%|██████████████████                 | 4131/8000 [1:20:58<1:15:18,  1.17s/it]

Episode 4131/8000, real env return = -101.99


 52%|██████████████████                 | 4141/8000 [1:21:10<1:11:53,  1.12s/it]

Episode 4141/8000, real env return = -101.99


 52%|██████████████████▏                | 4151/8000 [1:21:21<1:14:29,  1.16s/it]

Episode 4151/8000, real env return = -112.55


 52%|██████████████████▏                | 4161/8000 [1:21:32<1:10:50,  1.11s/it]

Episode 4161/8000, real env return = -111.48


 52%|██████████████████▏                | 4171/8000 [1:21:44<1:10:37,  1.11s/it]

Episode 4171/8000, real env return = -101.96


 52%|██████████████████▎                | 4181/8000 [1:21:55<1:15:17,  1.18s/it]

Episode 4181/8000, real env return = -102.30


 52%|██████████████████▎                | 4191/8000 [1:22:07<1:09:38,  1.10s/it]

Episode 4191/8000, real env return = -110.00


 53%|██████████████████▍                | 4201/8000 [1:22:19<1:21:57,  1.29s/it]

Episode 4201/8000, real env return = -99.50


 53%|██████████████████▍                | 4211/8000 [1:22:30<1:12:09,  1.14s/it]

Episode 4211/8000, real env return = -100.05


 53%|██████████████████▍                | 4221/8000 [1:22:42<1:10:14,  1.12s/it]

Episode 4221/8000, real env return = -100.25


 53%|██████████████████▌                | 4231/8000 [1:22:54<1:21:20,  1.30s/it]

Episode 4231/8000, real env return = -100.26


 53%|██████████████████▌                | 4241/8000 [1:23:06<1:10:39,  1.13s/it]

Episode 4241/8000, real env return = -111.08


 53%|██████████████████▌                | 4251/8000 [1:23:18<1:16:53,  1.23s/it]

Episode 4251/8000, real env return = -101.47


 53%|██████████████████▋                | 4261/8000 [1:23:30<1:12:07,  1.16s/it]

Episode 4261/8000, real env return = -101.82


 53%|██████████████████▋                | 4271/8000 [1:23:41<1:06:43,  1.07s/it]

Episode 4271/8000, real env return = -112.90


 54%|██████████████████▋                | 4281/8000 [1:23:53<1:16:01,  1.23s/it]

Episode 4281/8000, real env return = -111.51


 54%|██████████████████▊                | 4291/8000 [1:24:04<1:11:18,  1.15s/it]

Episode 4291/8000, real env return = -102.27


 54%|██████████████████▊                | 4301/8000 [1:24:16<1:10:04,  1.14s/it]

Episode 4301/8000, real env return = -102.48


 54%|██████████████████▊                | 4311/8000 [1:24:28<1:11:55,  1.17s/it]

Episode 4311/8000, real env return = -112.48


 54%|██████████████████▉                | 4321/8000 [1:24:39<1:07:20,  1.10s/it]

Episode 4321/8000, real env return = -112.28


 54%|██████████████████▉                | 4331/8000 [1:24:51<1:10:58,  1.16s/it]

Episode 4331/8000, real env return = -102.41


 54%|██████████████████▉                | 4341/8000 [1:25:02<1:10:45,  1.16s/it]

Episode 4341/8000, real env return = -101.47


 54%|███████████████████                | 4351/8000 [1:25:15<1:12:34,  1.19s/it]

Episode 4351/8000, real env return = -110.08


 55%|███████████████████                | 4361/8000 [1:25:26<1:11:26,  1.18s/it]

Episode 4361/8000, real env return = -109.10


 55%|███████████████████                | 4371/8000 [1:25:38<1:10:39,  1.17s/it]

Episode 4371/8000, real env return = -101.62


 55%|███████████████████▏               | 4381/8000 [1:25:49<1:08:03,  1.13s/it]

Episode 4381/8000, real env return = -101.21


 55%|███████████████████▏               | 4391/8000 [1:26:00<1:08:45,  1.14s/it]

Episode 4391/8000, real env return = -112.05


 55%|███████████████████▎               | 4401/8000 [1:26:12<1:09:02,  1.15s/it]

Episode 4401/8000, real env return = -112.49


 55%|███████████████████▎               | 4411/8000 [1:26:24<1:11:29,  1.20s/it]

Episode 4411/8000, real env return = -100.13


 55%|███████████████████▎               | 4421/8000 [1:26:35<1:11:02,  1.19s/it]

Episode 4421/8000, real env return = -102.09


 55%|███████████████████▍               | 4431/8000 [1:26:47<1:07:04,  1.13s/it]

Episode 4431/8000, real env return = -99.09


 56%|███████████████████▍               | 4441/8000 [1:26:59<1:12:24,  1.22s/it]

Episode 4441/8000, real env return = -111.76


 56%|███████████████████▍               | 4451/8000 [1:27:10<1:05:54,  1.11s/it]

Episode 4451/8000, real env return = -102.52


 56%|███████████████████▌               | 4461/8000 [1:27:22<1:10:05,  1.19s/it]

Episode 4461/8000, real env return = -101.50


 56%|███████████████████▌               | 4471/8000 [1:27:34<1:09:25,  1.18s/it]

Episode 4471/8000, real env return = -102.20


 56%|███████████████████▌               | 4481/8000 [1:27:45<1:07:01,  1.14s/it]

Episode 4481/8000, real env return = -111.68


 56%|███████████████████▋               | 4491/8000 [1:27:57<1:08:58,  1.18s/it]

Episode 4491/8000, real env return = -103.43


 56%|███████████████████▋               | 4501/8000 [1:28:09<1:09:53,  1.20s/it]

Episode 4501/8000, real env return = -103.50


 56%|███████████████████▋               | 4511/8000 [1:28:21<1:05:24,  1.12s/it]

Episode 4511/8000, real env return = -103.08


 57%|███████████████████▊               | 4521/8000 [1:28:32<1:08:52,  1.19s/it]

Episode 4521/8000, real env return = -110.28


 57%|███████████████████▊               | 4531/8000 [1:28:44<1:06:51,  1.16s/it]

Episode 4531/8000, real env return = -101.44


 57%|███████████████████▊               | 4541/8000 [1:28:56<1:06:38,  1.16s/it]

Episode 4541/8000, real env return = -103.08


 57%|███████████████████▉               | 4551/8000 [1:29:07<1:06:49,  1.16s/it]

Episode 4551/8000, real env return = -102.22


 57%|███████████████████▉               | 4561/8000 [1:29:18<1:01:59,  1.08s/it]

Episode 4561/8000, real env return = -111.32


 57%|███████████████████▉               | 4571/8000 [1:29:30<1:10:33,  1.23s/it]

Episode 4571/8000, real env return = -102.33


 57%|████████████████████               | 4581/8000 [1:29:41<1:06:24,  1.17s/it]

Episode 4581/8000, real env return = -102.99


 57%|████████████████████               | 4591/8000 [1:29:53<1:07:18,  1.18s/it]

Episode 4591/8000, real env return = -103.40


 58%|████████████████████▏              | 4601/8000 [1:30:05<1:06:03,  1.17s/it]

Episode 4601/8000, real env return = -110.63


 58%|████████████████████▏              | 4611/8000 [1:30:16<1:04:09,  1.14s/it]

Episode 4611/8000, real env return = -102.18


 58%|████████████████████▏              | 4621/8000 [1:30:28<1:04:25,  1.14s/it]

Episode 4621/8000, real env return = -103.36


 58%|████████████████████▎              | 4631/8000 [1:30:40<1:04:32,  1.15s/it]

Episode 4631/8000, real env return = -101.78


 58%|████████████████████▎              | 4641/8000 [1:30:51<1:03:39,  1.14s/it]

Episode 4641/8000, real env return = -109.27


 58%|████████████████████▎              | 4651/8000 [1:31:03<1:07:26,  1.21s/it]

Episode 4651/8000, real env return = -102.94


 58%|████████████████████▍              | 4661/8000 [1:31:15<1:06:08,  1.19s/it]

Episode 4661/8000, real env return = -103.20


 58%|████████████████████▍              | 4671/8000 [1:31:26<1:02:36,  1.13s/it]

Episode 4671/8000, real env return = -102.67


 59%|████████████████████▍              | 4681/8000 [1:31:38<1:08:55,  1.25s/it]

Episode 4681/8000, real env return = -111.99


 59%|████████████████████▌              | 4691/8000 [1:31:50<1:02:29,  1.13s/it]

Episode 4691/8000, real env return = -104.01


 59%|████████████████████▌              | 4701/8000 [1:32:02<1:07:44,  1.23s/it]

Episode 4701/8000, real env return = -106.06


 59%|████████████████████▌              | 4711/8000 [1:32:13<1:02:54,  1.15s/it]

Episode 4711/8000, real env return = -104.78


 59%|████████████████████▋              | 4721/8000 [1:32:24<1:00:19,  1.10s/it]

Episode 4721/8000, real env return = -112.20


 59%|████████████████████▋              | 4731/8000 [1:32:36<1:04:50,  1.19s/it]

Episode 4731/8000, real env return = -103.67


 59%|████████████████████▋              | 4741/8000 [1:32:48<1:04:14,  1.18s/it]

Episode 4741/8000, real env return = -102.16


 59%|████████████████████▊              | 4751/8000 [1:33:00<1:01:57,  1.14s/it]

Episode 4751/8000, real env return = -101.63


 60%|████████████████████▊              | 4761/8000 [1:33:11<1:02:39,  1.16s/it]

Episode 4761/8000, real env return = -112.79


 60%|████████████████████▊              | 4771/8000 [1:33:23<1:03:46,  1.19s/it]

Episode 4771/8000, real env return = -101.53


 60%|████████████████████▉              | 4781/8000 [1:33:35<1:03:30,  1.18s/it]

Episode 4781/8000, real env return = -101.26


 60%|████████████████████▉              | 4791/8000 [1:33:47<1:06:44,  1.25s/it]

Episode 4791/8000, real env return = -101.21


 60%|██████████████████████▏              | 4801/8000 [1:33:58<59:18,  1.11s/it]

Episode 4801/8000, real env return = -111.35


 60%|█████████████████████              | 4811/8000 [1:34:10<1:07:06,  1.26s/it]

Episode 4811/8000, real env return = -101.97


 60%|█████████████████████              | 4821/8000 [1:34:21<1:00:50,  1.15s/it]

Episode 4821/8000, real env return = -103.51


 60%|██████████████████████▎              | 4831/8000 [1:34:33<59:50,  1.13s/it]

Episode 4831/8000, real env return = -100.73


 61%|█████████████████████▏             | 4841/8000 [1:34:45<1:02:40,  1.19s/it]

Episode 4841/8000, real env return = -102.74


 61%|█████████████████████▏             | 4851/8000 [1:34:56<1:01:33,  1.17s/it]

Episode 4851/8000, real env return = -101.98


 61%|█████████████████████▎             | 4861/8000 [1:35:09<1:01:41,  1.18s/it]

Episode 4861/8000, real env return = -102.97


 61%|█████████████████████▎             | 4871/8000 [1:35:20<1:02:42,  1.20s/it]

Episode 4871/8000, real env return = -101.18


 61%|█████████████████████▎             | 4881/8000 [1:35:32<1:00:22,  1.16s/it]

Episode 4881/8000, real env return = -103.45


 61%|██████████████████████▌              | 4891/8000 [1:35:44<58:47,  1.13s/it]

Episode 4891/8000, real env return = -111.68


 61%|█████████████████████▍             | 4901/8000 [1:35:55<1:00:30,  1.17s/it]

Episode 4901/8000, real env return = -111.58


 61%|██████████████████████▋              | 4911/8000 [1:36:07<58:18,  1.13s/it]

Episode 4911/8000, real env return = -112.41


 62%|█████████████████████▌             | 4921/8000 [1:36:19<1:04:07,  1.25s/it]

Episode 4921/8000, real env return = -111.35


 62%|██████████████████████▊              | 4931/8000 [1:36:30<55:24,  1.08s/it]

Episode 4931/8000, real env return = -110.20


 62%|██████████████████████▊              | 4941/8000 [1:36:41<58:02,  1.14s/it]

Episode 4941/8000, real env return = -111.05


 62%|██████████████████████▉              | 4951/8000 [1:36:53<57:29,  1.13s/it]

Episode 4951/8000, real env return = -112.00


 62%|██████████████████████▉              | 4961/8000 [1:37:04<54:30,  1.08s/it]

Episode 4961/8000, real env return = -111.28


 62%|█████████████████████▋             | 4971/8000 [1:37:16<1:00:24,  1.20s/it]

Episode 4971/8000, real env return = -112.12


 62%|███████████████████████              | 4981/8000 [1:37:27<57:19,  1.14s/it]

Episode 4981/8000, real env return = -112.07


 62%|███████████████████████              | 4991/8000 [1:37:39<59:27,  1.19s/it]

Episode 4991/8000, real env return = -111.65


 63%|███████████████████████▏             | 5001/8000 [1:37:51<58:29,  1.17s/it]

Episode 5001/8000, real env return = -111.80


 63%|███████████████████████▏             | 5011/8000 [1:38:02<56:35,  1.14s/it]

Episode 5011/8000, real env return = -111.60


 63%|███████████████████████▏             | 5021/8000 [1:38:14<56:44,  1.14s/it]

Episode 5021/8000, real env return = -112.39


 63%|███████████████████████▎             | 5031/8000 [1:38:25<56:36,  1.14s/it]

Episode 5031/8000, real env return = -112.34


 63%|███████████████████████▎             | 5041/8000 [1:38:36<56:15,  1.14s/it]

Episode 5041/8000, real env return = -111.54


 63%|███████████████████████▎             | 5051/8000 [1:38:48<57:43,  1.17s/it]

Episode 5051/8000, real env return = -112.23


 63%|███████████████████████▍             | 5061/8000 [1:39:00<59:19,  1.21s/it]

Episode 5061/8000, real env return = -98.71


 63%|███████████████████████▍             | 5071/8000 [1:39:12<57:35,  1.18s/it]

Episode 5071/8000, real env return = -99.31


 64%|███████████████████████▍             | 5081/8000 [1:39:24<59:37,  1.23s/it]

Episode 5081/8000, real env return = -100.25


 64%|███████████████████████▌             | 5091/8000 [1:39:35<57:31,  1.19s/it]

Episode 5091/8000, real env return = -99.36


 64%|███████████████████████▌             | 5101/8000 [1:39:48<59:53,  1.24s/it]

Episode 5101/8000, real env return = -99.42


 64%|███████████████████████▋             | 5111/8000 [1:40:00<59:10,  1.23s/it]

Episode 5111/8000, real env return = -99.83


 64%|███████████████████████▋             | 5121/8000 [1:40:12<56:06,  1.17s/it]

Episode 5121/8000, real env return = -98.97


 64%|███████████████████████▋             | 5131/8000 [1:40:24<57:53,  1.21s/it]

Episode 5131/8000, real env return = -99.87


 64%|███████████████████████▊             | 5141/8000 [1:40:36<59:09,  1.24s/it]

Episode 5141/8000, real env return = -99.44


 64%|███████████████████████▊             | 5151/8000 [1:40:47<53:22,  1.12s/it]

Episode 5151/8000, real env return = -100.58


 65%|██████████████████████▌            | 5161/8000 [1:40:59<1:00:25,  1.28s/it]

Episode 5161/8000, real env return = -99.73


 65%|███████████████████████▉             | 5171/8000 [1:41:11<54:21,  1.15s/it]

Episode 5171/8000, real env return = -99.06


 65%|███████████████████████▉             | 5181/8000 [1:41:23<58:03,  1.24s/it]

Episode 5181/8000, real env return = -100.18


 65%|████████████████████████             | 5191/8000 [1:41:35<56:34,  1.21s/it]

Episode 5191/8000, real env return = -99.40


 65%|████████████████████████             | 5201/8000 [1:41:48<57:31,  1.23s/it]

Episode 5201/8000, real env return = -99.56


 65%|████████████████████████             | 5211/8000 [1:42:00<55:15,  1.19s/it]

Episode 5211/8000, real env return = -99.76


 65%|████████████████████████▏            | 5221/8000 [1:42:11<56:22,  1.22s/it]

Episode 5221/8000, real env return = -99.57


 65%|████████████████████████▏            | 5231/8000 [1:42:23<53:09,  1.15s/it]

Episode 5231/8000, real env return = -100.61


 66%|████████████████████████▏            | 5241/8000 [1:42:36<57:54,  1.26s/it]

Episode 5241/8000, real env return = -99.88


 66%|████████████████████████▎            | 5251/8000 [1:42:47<54:52,  1.20s/it]

Episode 5251/8000, real env return = -102.10


 66%|████████████████████████▎            | 5261/8000 [1:42:59<55:48,  1.22s/it]

Episode 5261/8000, real env return = -99.70


 66%|████████████████████████▍            | 5271/8000 [1:43:11<54:17,  1.19s/it]

Episode 5271/8000, real env return = -99.98


 66%|████████████████████████▍            | 5281/8000 [1:43:23<56:11,  1.24s/it]

Episode 5281/8000, real env return = -99.13


 66%|████████████████████████▍            | 5291/8000 [1:43:34<49:51,  1.10s/it]

Episode 5291/8000, real env return = -103.31


 66%|████████████████████████▌            | 5301/8000 [1:43:46<53:21,  1.19s/it]

Episode 5301/8000, real env return = -100.01


 66%|████████████████████████▌            | 5311/8000 [1:43:58<49:49,  1.11s/it]

Episode 5311/8000, real env return = -110.46


 67%|████████████████████████▌            | 5321/8000 [1:44:09<49:42,  1.11s/it]

Episode 5321/8000, real env return = -111.04


 67%|████████████████████████▋            | 5331/8000 [1:44:21<53:09,  1.19s/it]

Episode 5331/8000, real env return = -101.30


 67%|████████████████████████▋            | 5341/8000 [1:44:33<49:33,  1.12s/it]

Episode 5341/8000, real env return = -112.13


 67%|████████████████████████▋            | 5351/8000 [1:44:44<53:33,  1.21s/it]

Episode 5351/8000, real env return = -112.62


 67%|████████████████████████▊            | 5361/8000 [1:44:56<49:54,  1.13s/it]

Episode 5361/8000, real env return = -112.12


 67%|████████████████████████▊            | 5371/8000 [1:45:08<52:35,  1.20s/it]

Episode 5371/8000, real env return = -101.02


 67%|████████████████████████▉            | 5381/8000 [1:45:19<52:05,  1.19s/it]

Episode 5381/8000, real env return = -99.45


 67%|████████████████████████▉            | 5391/8000 [1:45:31<53:14,  1.22s/it]

Episode 5391/8000, real env return = -99.68


 68%|████████████████████████▉            | 5401/8000 [1:45:43<50:37,  1.17s/it]

Episode 5401/8000, real env return = -109.51


 68%|█████████████████████████            | 5411/8000 [1:45:55<53:52,  1.25s/it]

Episode 5411/8000, real env return = -101.88


 68%|█████████████████████████            | 5421/8000 [1:46:07<48:22,  1.13s/it]

Episode 5421/8000, real env return = -110.69


 68%|█████████████████████████            | 5431/8000 [1:46:19<51:53,  1.21s/it]

Episode 5431/8000, real env return = -110.81


 68%|█████████████████████████▏           | 5441/8000 [1:46:30<48:14,  1.13s/it]

Episode 5441/8000, real env return = -110.46


 68%|█████████████████████████▏           | 5451/8000 [1:46:41<47:04,  1.11s/it]

Episode 5451/8000, real env return = -103.27


 68%|█████████████████████████▎           | 5461/8000 [1:46:53<50:10,  1.19s/it]

Episode 5461/8000, real env return = -110.35


 68%|█████████████████████████▎           | 5471/8000 [1:47:04<46:14,  1.10s/it]

Episode 5471/8000, real env return = -110.56


 69%|█████████████████████████▎           | 5481/8000 [1:47:16<48:57,  1.17s/it]

Episode 5481/8000, real env return = -110.56


 69%|█████████████████████████▍           | 5491/8000 [1:47:27<47:58,  1.15s/it]

Episode 5491/8000, real env return = -103.15


 69%|█████████████████████████▍           | 5501/8000 [1:47:38<45:03,  1.08s/it]

Episode 5501/8000, real env return = -110.84


 69%|█████████████████████████▍           | 5511/8000 [1:47:50<49:24,  1.19s/it]

Episode 5511/8000, real env return = -110.72


 69%|█████████████████████████▌           | 5521/8000 [1:48:01<47:03,  1.14s/it]

Episode 5521/8000, real env return = -110.80


 69%|█████████████████████████▌           | 5531/8000 [1:48:13<47:53,  1.16s/it]

Episode 5531/8000, real env return = -102.86


 69%|█████████████████████████▋           | 5541/8000 [1:48:25<49:04,  1.20s/it]

Episode 5541/8000, real env return = -110.56


 69%|█████████████████████████▋           | 5551/8000 [1:48:36<45:21,  1.11s/it]

Episode 5551/8000, real env return = -111.12


 70%|█████████████████████████▋           | 5561/8000 [1:48:48<48:25,  1.19s/it]

Episode 5561/8000, real env return = -111.19


 70%|█████████████████████████▊           | 5571/8000 [1:48:59<46:46,  1.16s/it]

Episode 5571/8000, real env return = -103.25


 70%|█████████████████████████▊           | 5581/8000 [1:49:11<47:58,  1.19s/it]

Episode 5581/8000, real env return = -109.13


 70%|█████████████████████████▊           | 5591/8000 [1:49:23<46:35,  1.16s/it]

Episode 5591/8000, real env return = -109.26


 70%|█████████████████████████▉           | 5601/8000 [1:49:35<47:49,  1.20s/it]

Episode 5601/8000, real env return = -108.78


 70%|█████████████████████████▉           | 5611/8000 [1:49:46<45:29,  1.14s/it]

Episode 5611/8000, real env return = -102.24


 70%|█████████████████████████▉           | 5621/8000 [1:49:58<47:29,  1.20s/it]

Episode 5621/8000, real env return = -109.05


 70%|██████████████████████████           | 5631/8000 [1:50:09<43:05,  1.09s/it]

Episode 5631/8000, real env return = -109.29


 71%|██████████████████████████           | 5641/8000 [1:50:21<46:19,  1.18s/it]

Episode 5641/8000, real env return = -109.28


 71%|██████████████████████████▏          | 5651/8000 [1:50:33<45:20,  1.16s/it]

Episode 5651/8000, real env return = -103.29


 71%|██████████████████████████▏          | 5661/8000 [1:50:44<43:31,  1.12s/it]

Episode 5661/8000, real env return = -101.01


 71%|██████████████████████████▏          | 5671/8000 [1:50:56<46:27,  1.20s/it]

Episode 5671/8000, real env return = -109.53


 71%|██████████████████████████▎          | 5681/8000 [1:51:07<41:35,  1.08s/it]

Episode 5681/8000, real env return = -109.31


 71%|██████████████████████████▎          | 5691/8000 [1:51:19<46:39,  1.21s/it]

Episode 5691/8000, real env return = -104.46


 71%|██████████████████████████▎          | 5701/8000 [1:51:30<44:28,  1.16s/it]

Episode 5701/8000, real env return = -102.89


 71%|██████████████████████████▍          | 5711/8000 [1:51:42<43:24,  1.14s/it]

Episode 5711/8000, real env return = -109.26


 72%|██████████████████████████▍          | 5721/8000 [1:51:53<43:53,  1.16s/it]

Episode 5721/8000, real env return = -109.23


 72%|██████████████████████████▌          | 5731/8000 [1:52:04<42:09,  1.11s/it]

Episode 5731/8000, real env return = -101.96


 72%|██████████████████████████▌          | 5741/8000 [1:52:16<44:16,  1.18s/it]

Episode 5741/8000, real env return = -101.32


 72%|██████████████████████████▌          | 5751/8000 [1:52:28<45:12,  1.21s/it]

Episode 5751/8000, real env return = -98.28


 72%|██████████████████████████▋          | 5761/8000 [1:52:40<42:02,  1.13s/it]

Episode 5761/8000, real env return = -109.88


 72%|██████████████████████████▋          | 5771/8000 [1:52:51<43:46,  1.18s/it]

Episode 5771/8000, real env return = -100.52


 72%|██████████████████████████▋          | 5781/8000 [1:53:03<44:23,  1.20s/it]

Episode 5781/8000, real env return = -100.48


 72%|██████████████████████████▊          | 5791/8000 [1:53:14<40:50,  1.11s/it]

Episode 5791/8000, real env return = -108.71


 73%|██████████████████████████▊          | 5801/8000 [1:53:26<43:33,  1.19s/it]

Episode 5801/8000, real env return = -108.90


 73%|██████████████████████████▉          | 5811/8000 [1:53:38<42:16,  1.16s/it]

Episode 5811/8000, real env return = -99.94


 73%|██████████████████████████▉          | 5821/8000 [1:53:49<41:54,  1.15s/it]

Episode 5821/8000, real env return = -103.61


 73%|██████████████████████████▉          | 5831/8000 [1:54:01<42:58,  1.19s/it]

Episode 5831/8000, real env return = -108.23


 73%|███████████████████████████          | 5841/8000 [1:54:12<39:59,  1.11s/it]

Episode 5841/8000, real env return = -109.23


 73%|███████████████████████████          | 5851/8000 [1:54:24<43:00,  1.20s/it]

Episode 5851/8000, real env return = -100.61


 73%|███████████████████████████          | 5861/8000 [1:54:36<41:56,  1.18s/it]

Episode 5861/8000, real env return = -98.20


 73%|███████████████████████████▏         | 5871/8000 [1:54:48<42:07,  1.19s/it]

Episode 5871/8000, real env return = -109.01


 74%|███████████████████████████▏         | 5881/8000 [1:54:59<40:55,  1.16s/it]

Episode 5881/8000, real env return = -109.20


 74%|███████████████████████████▏         | 5891/8000 [1:55:11<42:39,  1.21s/it]

Episode 5891/8000, real env return = -99.60


 74%|███████████████████████████▎         | 5901/8000 [1:55:23<39:40,  1.13s/it]

Episode 5901/8000, real env return = -100.03


 74%|███████████████████████████▎         | 5911/8000 [1:55:35<41:26,  1.19s/it]

Episode 5911/8000, real env return = -109.63


 74%|███████████████████████████▍         | 5921/8000 [1:55:46<37:51,  1.09s/it]

Episode 5921/8000, real env return = -109.27


 74%|███████████████████████████▍         | 5931/8000 [1:55:58<41:48,  1.21s/it]

Episode 5931/8000, real env return = -98.93


 74%|███████████████████████████▍         | 5941/8000 [1:56:09<40:45,  1.19s/it]

Episode 5941/8000, real env return = -98.86


 74%|███████████████████████████▌         | 5951/8000 [1:56:21<38:57,  1.14s/it]

Episode 5951/8000, real env return = -109.59


 75%|███████████████████████████▌         | 5961/8000 [1:56:33<40:02,  1.18s/it]

Episode 5961/8000, real env return = -109.57


 75%|███████████████████████████▌         | 5971/8000 [1:56:44<38:54,  1.15s/it]

Episode 5971/8000, real env return = -100.08


 75%|███████████████████████████▋         | 5981/8000 [1:56:56<40:13,  1.20s/it]

Episode 5981/8000, real env return = -99.10


 75%|███████████████████████████▋         | 5991/8000 [1:57:08<39:16,  1.17s/it]

Episode 5991/8000, real env return = -109.41


 75%|███████████████████████████▊         | 6001/8000 [1:57:19<39:20,  1.18s/it]

Episode 6001/8000, real env return = -109.10


 75%|███████████████████████████▊         | 6011/8000 [1:57:31<38:42,  1.17s/it]

Episode 6011/8000, real env return = -100.81


 75%|███████████████████████████▊         | 6021/8000 [1:57:43<40:38,  1.23s/it]

Episode 6021/8000, real env return = -99.43


 75%|███████████████████████████▉         | 6031/8000 [1:57:54<36:38,  1.12s/it]

Episode 6031/8000, real env return = -109.08


 76%|███████████████████████████▉         | 6041/8000 [1:58:06<39:08,  1.20s/it]

Episode 6041/8000, real env return = -109.39


 76%|███████████████████████████▉         | 6051/8000 [1:58:18<38:34,  1.19s/it]

Episode 6051/8000, real env return = -100.27


 76%|████████████████████████████         | 6061/8000 [1:58:30<36:12,  1.12s/it]

Episode 6061/8000, real env return = -99.08


 76%|████████████████████████████         | 6071/8000 [1:58:41<37:36,  1.17s/it]

Episode 6071/8000, real env return = -100.31


 76%|████████████████████████████         | 6081/8000 [1:58:52<35:35,  1.11s/it]

Episode 6081/8000, real env return = -109.12


 76%|████████████████████████████▏        | 6091/8000 [1:59:04<35:19,  1.11s/it]

Episode 6091/8000, real env return = -100.42


 76%|████████████████████████████▏        | 6101/8000 [1:59:16<39:19,  1.24s/it]

Episode 6101/8000, real env return = -100.09


 76%|████████████████████████████▎        | 6111/8000 [1:59:28<36:06,  1.15s/it]

Episode 6111/8000, real env return = -100.38


 77%|████████████████████████████▎        | 6121/8000 [1:59:40<39:32,  1.26s/it]

Episode 6121/8000, real env return = -109.08


 77%|████████████████████████████▎        | 6131/8000 [1:59:51<37:07,  1.19s/it]

Episode 6131/8000, real env return = -98.93


 77%|████████████████████████████▍        | 6141/8000 [2:00:03<37:36,  1.21s/it]

Episode 6141/8000, real env return = -100.77


 77%|████████████████████████████▍        | 6151/8000 [2:00:15<36:45,  1.19s/it]

Episode 6151/8000, real env return = -101.02


 77%|████████████████████████████▍        | 6161/8000 [2:00:26<33:34,  1.10s/it]

Episode 6161/8000, real env return = -108.99


 77%|████████████████████████████▌        | 6171/8000 [2:00:37<34:46,  1.14s/it]

Episode 6171/8000, real env return = -101.30


 77%|████████████████████████████▌        | 6181/8000 [2:00:49<34:58,  1.15s/it]

Episode 6181/8000, real env return = -101.15


 77%|████████████████████████████▋        | 6191/8000 [2:01:00<34:20,  1.14s/it]

Episode 6191/8000, real env return = -100.21


 78%|████████████████████████████▋        | 6201/8000 [2:01:12<33:52,  1.13s/it]

Episode 6201/8000, real env return = -108.93


 78%|████████████████████████████▋        | 6211/8000 [2:01:23<34:53,  1.17s/it]

Episode 6211/8000, real env return = -100.67


 78%|████████████████████████████▊        | 6221/8000 [2:01:35<32:41,  1.10s/it]

Episode 6221/8000, real env return = -100.91


 78%|████████████████████████████▊        | 6231/8000 [2:01:46<35:23,  1.20s/it]

Episode 6231/8000, real env return = -100.48


 78%|████████████████████████████▊        | 6241/8000 [2:01:58<32:24,  1.11s/it]

Episode 6241/8000, real env return = -110.19


 78%|████████████████████████████▉        | 6251/8000 [2:02:09<33:19,  1.14s/it]

Episode 6251/8000, real env return = -100.18


 78%|████████████████████████████▉        | 6261/8000 [2:02:20<33:52,  1.17s/it]

Episode 6261/8000, real env return = -100.61


 78%|█████████████████████████████        | 6271/8000 [2:02:32<32:17,  1.12s/it]

Episode 6271/8000, real env return = -99.71


 79%|█████████████████████████████        | 6281/8000 [2:02:44<34:13,  1.19s/it]

Episode 6281/8000, real env return = -110.25


 79%|█████████████████████████████        | 6291/8000 [2:02:54<30:49,  1.08s/it]

Episode 6291/8000, real env return = -100.39


 79%|█████████████████████████████▏       | 6301/8000 [2:03:06<31:04,  1.10s/it]

Episode 6301/8000, real env return = -101.13


 79%|█████████████████████████████▏       | 6311/8000 [2:03:18<32:35,  1.16s/it]

Episode 6311/8000, real env return = -101.23


 79%|█████████████████████████████▏       | 6321/8000 [2:03:29<30:37,  1.09s/it]

Episode 6321/8000, real env return = -109.10


 79%|█████████████████████████████▎       | 6331/8000 [2:03:41<32:42,  1.18s/it]

Episode 6331/8000, real env return = -99.03


 79%|█████████████████████████████▎       | 6341/8000 [2:03:52<31:49,  1.15s/it]

Episode 6341/8000, real env return = -101.71


 79%|█████████████████████████████▎       | 6351/8000 [2:04:03<30:42,  1.12s/it]

Episode 6351/8000, real env return = -100.91


 80%|█████████████████████████████▍       | 6361/8000 [2:04:15<31:16,  1.14s/it]

Episode 6361/8000, real env return = -109.04


 80%|█████████████████████████████▍       | 6371/8000 [2:04:27<31:41,  1.17s/it]

Episode 6371/8000, real env return = -99.77


 80%|█████████████████████████████▌       | 6381/8000 [2:04:38<30:34,  1.13s/it]

Episode 6381/8000, real env return = -100.86


 80%|█████████████████████████████▌       | 6391/8000 [2:04:50<31:56,  1.19s/it]

Episode 6391/8000, real env return = -100.75


 80%|█████████████████████████████▌       | 6401/8000 [2:05:01<30:28,  1.14s/it]

Episode 6401/8000, real env return = -109.12


 80%|█████████████████████████████▋       | 6411/8000 [2:05:13<30:24,  1.15s/it]

Episode 6411/8000, real env return = -101.91


 80%|█████████████████████████████▋       | 6421/8000 [2:05:25<31:08,  1.18s/it]

Episode 6421/8000, real env return = -98.71


 80%|█████████████████████████████▋       | 6431/8000 [2:05:36<29:49,  1.14s/it]

Episode 6431/8000, real env return = -101.23


 81%|█████████████████████████████▊       | 6441/8000 [2:05:48<31:45,  1.22s/it]

Episode 6441/8000, real env return = -109.37


 81%|█████████████████████████████▊       | 6451/8000 [2:06:00<29:22,  1.14s/it]

Episode 6451/8000, real env return = -101.08


 81%|█████████████████████████████▉       | 6461/8000 [2:06:12<29:07,  1.14s/it]

Episode 6461/8000, real env return = -101.25


 81%|█████████████████████████████▉       | 6471/8000 [2:06:23<30:18,  1.19s/it]

Episode 6471/8000, real env return = -101.99


 81%|█████████████████████████████▉       | 6481/8000 [2:06:34<28:08,  1.11s/it]

Episode 6481/8000, real env return = -100.64


 81%|██████████████████████████████       | 6491/8000 [2:06:46<29:41,  1.18s/it]

Episode 6491/8000, real env return = -98.85


 81%|██████████████████████████████       | 6501/8000 [2:06:58<29:20,  1.17s/it]

Episode 6501/8000, real env return = -101.49


 81%|██████████████████████████████       | 6511/8000 [2:07:09<28:02,  1.13s/it]

Episode 6511/8000, real env return = -101.21


 82%|██████████████████████████████▏      | 6521/8000 [2:07:21<29:31,  1.20s/it]

Episode 6521/8000, real env return = -101.15


 82%|██████████████████████████████▏      | 6531/8000 [2:07:32<26:36,  1.09s/it]

Episode 6531/8000, real env return = -109.82


 82%|██████████████████████████████▎      | 6541/8000 [2:07:44<26:48,  1.10s/it]

Episode 6541/8000, real env return = -109.07


 82%|██████████████████████████████▎      | 6551/8000 [2:07:55<28:59,  1.20s/it]

Episode 6551/8000, real env return = -109.18


 82%|██████████████████████████████▎      | 6561/8000 [2:08:06<25:31,  1.06s/it]

Episode 6561/8000, real env return = -109.21


 82%|██████████████████████████████▍      | 6571/8000 [2:08:18<28:54,  1.21s/it]

Episode 6571/8000, real env return = -109.63


 82%|██████████████████████████████▍      | 6581/8000 [2:08:29<26:45,  1.13s/it]

Episode 6581/8000, real env return = -109.22


 82%|██████████████████████████████▍      | 6591/8000 [2:08:40<26:23,  1.12s/it]

Episode 6591/8000, real env return = -109.48


 83%|██████████████████████████████▌      | 6601/8000 [2:08:52<27:12,  1.17s/it]

Episode 6601/8000, real env return = -109.67


 83%|██████████████████████████████▌      | 6611/8000 [2:09:03<26:20,  1.14s/it]

Episode 6611/8000, real env return = -109.63


 83%|██████████████████████████████▌      | 6621/8000 [2:09:14<25:27,  1.11s/it]

Episode 6621/8000, real env return = -108.89


 83%|██████████████████████████████▋      | 6631/8000 [2:09:26<27:05,  1.19s/it]

Episode 6631/8000, real env return = -108.31


 83%|██████████████████████████████▋      | 6641/8000 [2:09:37<24:59,  1.10s/it]

Episode 6641/8000, real env return = -108.99


 83%|██████████████████████████████▊      | 6651/8000 [2:09:49<26:42,  1.19s/it]

Episode 6651/8000, real env return = -109.53


 83%|██████████████████████████████▊      | 6661/8000 [2:10:00<25:33,  1.15s/it]

Episode 6661/8000, real env return = -109.46


 83%|██████████████████████████████▊      | 6671/8000 [2:10:11<24:06,  1.09s/it]

Episode 6671/8000, real env return = -109.06


 84%|██████████████████████████████▉      | 6681/8000 [2:10:23<26:19,  1.20s/it]

Episode 6681/8000, real env return = -98.21


 84%|██████████████████████████████▉      | 6691/8000 [2:10:34<25:36,  1.17s/it]

Episode 6691/8000, real env return = -98.46


 84%|██████████████████████████████▉      | 6701/8000 [2:10:46<25:09,  1.16s/it]

Episode 6701/8000, real env return = -97.75


 84%|███████████████████████████████      | 6711/8000 [2:10:58<25:37,  1.19s/it]

Episode 6711/8000, real env return = -97.71


 84%|███████████████████████████████      | 6721/8000 [2:11:10<26:00,  1.22s/it]

Episode 6721/8000, real env return = -97.44


 84%|███████████████████████████████▏     | 6731/8000 [2:11:21<23:41,  1.12s/it]

Episode 6731/8000, real env return = -97.68


 84%|███████████████████████████████▏     | 6741/8000 [2:11:34<26:28,  1.26s/it]

Episode 6741/8000, real env return = -97.70


 84%|███████████████████████████████▏     | 6751/8000 [2:11:45<22:54,  1.10s/it]

Episode 6751/8000, real env return = -97.78


 85%|███████████████████████████████▎     | 6761/8000 [2:11:57<24:34,  1.19s/it]

Episode 6761/8000, real env return = -97.45


 85%|███████████████████████████████▎     | 6771/8000 [2:12:08<23:32,  1.15s/it]

Episode 6771/8000, real env return = -98.03


 85%|███████████████████████████████▎     | 6781/8000 [2:12:20<22:17,  1.10s/it]

Episode 6781/8000, real env return = -98.20


 85%|███████████████████████████████▍     | 6791/8000 [2:12:32<24:21,  1.21s/it]

Episode 6791/8000, real env return = -98.16


 85%|███████████████████████████████▍     | 6801/8000 [2:12:43<23:26,  1.17s/it]

Episode 6801/8000, real env return = -98.03


 85%|███████████████████████████████▌     | 6811/8000 [2:12:55<23:42,  1.20s/it]

Episode 6811/8000, real env return = -98.58


 85%|███████████████████████████████▌     | 6821/8000 [2:13:06<22:50,  1.16s/it]

Episode 6821/8000, real env return = -97.92


 85%|███████████████████████████████▌     | 6831/8000 [2:13:18<23:20,  1.20s/it]

Episode 6831/8000, real env return = -97.58


 86%|███████████████████████████████▋     | 6841/8000 [2:13:30<21:19,  1.10s/it]

Episode 6841/8000, real env return = -98.16


 86%|███████████████████████████████▋     | 6851/8000 [2:13:41<23:38,  1.23s/it]

Episode 6851/8000, real env return = -98.01


 86%|███████████████████████████████▋     | 6861/8000 [2:13:53<20:52,  1.10s/it]

Episode 6861/8000, real env return = -97.86


 86%|███████████████████████████████▊     | 6871/8000 [2:14:05<22:07,  1.18s/it]

Episode 6871/8000, real env return = -97.88


 86%|███████████████████████████████▊     | 6881/8000 [2:14:16<21:54,  1.17s/it]

Episode 6881/8000, real env return = -99.69


 86%|███████████████████████████████▊     | 6891/8000 [2:14:28<20:45,  1.12s/it]

Episode 6891/8000, real env return = -98.72


 86%|███████████████████████████████▉     | 6901/8000 [2:14:40<21:28,  1.17s/it]

Episode 6901/8000, real env return = -98.49


 86%|███████████████████████████████▉     | 6911/8000 [2:14:51<21:09,  1.17s/it]

Episode 6911/8000, real env return = -100.38


 87%|████████████████████████████████     | 6921/8000 [2:15:03<20:19,  1.13s/it]

Episode 6921/8000, real env return = -100.10


 87%|████████████████████████████████     | 6931/8000 [2:15:14<20:47,  1.17s/it]

Episode 6931/8000, real env return = -98.95


 87%|████████████████████████████████     | 6941/8000 [2:15:26<21:50,  1.24s/it]

Episode 6941/8000, real env return = -98.45


 87%|████████████████████████████████▏    | 6951/8000 [2:15:38<19:51,  1.14s/it]

Episode 6951/8000, real env return = -98.34


 87%|████████████████████████████████▏    | 6961/8000 [2:15:49<20:53,  1.21s/it]

Episode 6961/8000, real env return = -98.99


 87%|████████████████████████████████▏    | 6971/8000 [2:16:01<19:42,  1.15s/it]

Episode 6971/8000, real env return = -98.04


 87%|████████████████████████████████▎    | 6981/8000 [2:16:13<19:23,  1.14s/it]

Episode 6981/8000, real env return = -98.15


 87%|████████████████████████████████▎    | 6991/8000 [2:16:24<19:36,  1.17s/it]

Episode 6991/8000, real env return = -99.64


 88%|████████████████████████████████▍    | 7001/8000 [2:16:35<17:51,  1.07s/it]

Episode 7001/8000, real env return = -97.56


 88%|████████████████████████████████▍    | 7011/8000 [2:16:47<18:56,  1.15s/it]

Episode 7011/8000, real env return = -98.17


 88%|████████████████████████████████▍    | 7021/8000 [2:16:59<18:58,  1.16s/it]

Episode 7021/8000, real env return = -97.97


 88%|████████████████████████████████▌    | 7031/8000 [2:17:10<19:00,  1.18s/it]

Episode 7031/8000, real env return = -98.69


 88%|████████████████████████████████▌    | 7041/8000 [2:17:22<18:41,  1.17s/it]

Episode 7041/8000, real env return = -98.28


 88%|████████████████████████████████▌    | 7051/8000 [2:17:34<18:31,  1.17s/it]

Episode 7051/8000, real env return = -99.62


 88%|████████████████████████████████▋    | 7061/8000 [2:17:45<18:02,  1.15s/it]

Episode 7061/8000, real env return = -98.25


 88%|████████████████████████████████▋    | 7071/8000 [2:17:57<18:34,  1.20s/it]

Episode 7071/8000, real env return = -97.78


 89%|████████████████████████████████▋    | 7081/8000 [2:18:09<18:25,  1.20s/it]

Episode 7081/8000, real env return = -100.60


 89%|████████████████████████████████▊    | 7091/8000 [2:18:20<16:55,  1.12s/it]

Episode 7091/8000, real env return = -98.27


 89%|████████████████████████████████▊    | 7101/8000 [2:18:32<18:22,  1.23s/it]

Episode 7101/8000, real env return = -98.16


 89%|████████████████████████████████▉    | 7111/8000 [2:18:44<17:11,  1.16s/it]

Episode 7111/8000, real env return = -98.09


 89%|████████████████████████████████▉    | 7121/8000 [2:18:55<16:10,  1.10s/it]

Episode 7121/8000, real env return = -98.50


 89%|████████████████████████████████▉    | 7131/8000 [2:19:07<17:27,  1.21s/it]

Episode 7131/8000, real env return = -98.18


 89%|█████████████████████████████████    | 7141/8000 [2:19:19<16:02,  1.12s/it]

Episode 7141/8000, real env return = -98.04


 89%|█████████████████████████████████    | 7151/8000 [2:19:30<16:53,  1.19s/it]

Episode 7151/8000, real env return = -98.51


 90%|█████████████████████████████████    | 7161/8000 [2:19:42<16:13,  1.16s/it]

Episode 7161/8000, real env return = -99.00


 90%|█████████████████████████████████▏   | 7171/8000 [2:19:54<16:14,  1.18s/it]

Episode 7171/8000, real env return = -98.27


 90%|█████████████████████████████████▏   | 7181/8000 [2:20:06<15:53,  1.16s/it]

Episode 7181/8000, real env return = -98.23


 90%|█████████████████████████████████▎   | 7191/8000 [2:20:18<16:41,  1.24s/it]

Episode 7191/8000, real env return = -98.58


 90%|█████████████████████████████████▎   | 7201/8000 [2:20:29<15:12,  1.14s/it]

Episode 7201/8000, real env return = -98.65


 90%|█████████████████████████████████▎   | 7211/8000 [2:20:41<15:58,  1.21s/it]

Episode 7211/8000, real env return = -98.22


 90%|█████████████████████████████████▍   | 7221/8000 [2:20:52<14:45,  1.14s/it]

Episode 7221/8000, real env return = -98.18


 90%|█████████████████████████████████▍   | 7231/8000 [2:21:04<14:41,  1.15s/it]

Episode 7231/8000, real env return = -98.33


 91%|█████████████████████████████████▍   | 7241/8000 [2:21:16<14:37,  1.16s/it]

Episode 7241/8000, real env return = -98.57


 91%|█████████████████████████████████▌   | 7251/8000 [2:21:27<13:59,  1.12s/it]

Episode 7251/8000, real env return = -98.55


 91%|█████████████████████████████████▌   | 7261/8000 [2:21:39<14:15,  1.16s/it]

Episode 7261/8000, real env return = -97.94


 91%|█████████████████████████████████▋   | 7271/8000 [2:21:50<14:14,  1.17s/it]

Episode 7271/8000, real env return = -98.34


 91%|█████████████████████████████████▋   | 7281/8000 [2:22:02<14:16,  1.19s/it]

Episode 7281/8000, real env return = -98.44


 91%|█████████████████████████████████▋   | 7291/8000 [2:22:14<13:13,  1.12s/it]

Episode 7291/8000, real env return = -98.69


 91%|█████████████████████████████████▊   | 7301/8000 [2:22:26<14:20,  1.23s/it]

Episode 7301/8000, real env return = -97.14


 91%|█████████████████████████████████▊   | 7311/8000 [2:22:37<12:58,  1.13s/it]

Episode 7311/8000, real env return = -97.67


 92%|█████████████████████████████████▊   | 7321/8000 [2:22:49<13:52,  1.23s/it]

Episode 7321/8000, real env return = -98.02


 92%|█████████████████████████████████▉   | 7331/8000 [2:23:00<12:51,  1.15s/it]

Episode 7331/8000, real env return = -98.14


 92%|█████████████████████████████████▉   | 7341/8000 [2:23:12<12:17,  1.12s/it]

Episode 7341/8000, real env return = -98.20


 92%|█████████████████████████████████▉   | 7351/8000 [2:23:23<12:33,  1.16s/it]

Episode 7351/8000, real env return = -98.49


 92%|██████████████████████████████████   | 7361/8000 [2:23:35<11:52,  1.12s/it]

Episode 7361/8000, real env return = -99.94


 92%|██████████████████████████████████   | 7371/8000 [2:23:46<12:02,  1.15s/it]

Episode 7371/8000, real env return = -98.60


 92%|██████████████████████████████████▏  | 7381/8000 [2:23:58<11:53,  1.15s/it]

Episode 7381/8000, real env return = -98.23


 92%|██████████████████████████████████▏  | 7391/8000 [2:24:09<11:44,  1.16s/it]

Episode 7391/8000, real env return = -98.43


 93%|██████████████████████████████████▏  | 7401/8000 [2:24:21<11:36,  1.16s/it]

Episode 7401/8000, real env return = -98.07


 93%|██████████████████████████████████▎  | 7411/8000 [2:24:33<11:46,  1.20s/it]

Episode 7411/8000, real env return = -98.39


 93%|██████████████████████████████████▎  | 7421/8000 [2:24:44<11:09,  1.16s/it]

Episode 7421/8000, real env return = -98.67


 93%|██████████████████████████████████▎  | 7431/8000 [2:24:56<11:05,  1.17s/it]

Episode 7431/8000, real env return = -99.17


 93%|██████████████████████████████████▍  | 7441/8000 [2:25:08<10:56,  1.17s/it]

Episode 7441/8000, real env return = -98.85


 93%|██████████████████████████████████▍  | 7451/8000 [2:25:19<10:15,  1.12s/it]

Episode 7451/8000, real env return = -97.51


 93%|██████████████████████████████████▌  | 7461/8000 [2:25:31<10:40,  1.19s/it]

Episode 7461/8000, real env return = -97.93


 93%|██████████████████████████████████▌  | 7471/8000 [2:25:42<09:50,  1.12s/it]

Episode 7471/8000, real env return = -98.12


 94%|██████████████████████████████████▌  | 7481/8000 [2:25:54<10:04,  1.16s/it]

Episode 7481/8000, real env return = -98.56


 94%|██████████████████████████████████▋  | 7491/8000 [2:26:05<10:01,  1.18s/it]

Episode 7491/8000, real env return = -98.46


 94%|██████████████████████████████████▋  | 7501/8000 [2:26:16<08:55,  1.07s/it]

Episode 7501/8000, real env return = -98.81


 94%|██████████████████████████████████▋  | 7511/8000 [2:26:28<09:22,  1.15s/it]

Episode 7511/8000, real env return = -98.09


 94%|██████████████████████████████████▊  | 7521/8000 [2:26:39<09:13,  1.16s/it]

Episode 7521/8000, real env return = -98.51


 94%|██████████████████████████████████▊  | 7531/8000 [2:26:51<09:05,  1.16s/it]

Episode 7531/8000, real env return = -101.20


 94%|██████████████████████████████████▉  | 7541/8000 [2:27:03<09:03,  1.18s/it]

Episode 7541/8000, real env return = -98.83


 94%|██████████████████████████████████▉  | 7551/8000 [2:27:15<08:50,  1.18s/it]

Episode 7551/8000, real env return = -97.72


 95%|██████████████████████████████████▉  | 7561/8000 [2:27:26<07:58,  1.09s/it]

Episode 7561/8000, real env return = -100.36


 95%|███████████████████████████████████  | 7571/8000 [2:27:38<08:25,  1.18s/it]

Episode 7571/8000, real env return = -99.85


 95%|███████████████████████████████████  | 7581/8000 [2:27:49<07:58,  1.14s/it]

Episode 7581/8000, real env return = -98.65


 95%|███████████████████████████████████  | 7591/8000 [2:28:01<07:50,  1.15s/it]

Episode 7591/8000, real env return = -98.83


 95%|███████████████████████████████████▏ | 7601/8000 [2:28:13<07:52,  1.18s/it]

Episode 7601/8000, real env return = -97.91


 95%|███████████████████████████████████▏ | 7611/8000 [2:28:24<07:12,  1.11s/it]

Episode 7611/8000, real env return = -98.82


 95%|███████████████████████████████████▏ | 7621/8000 [2:28:36<07:05,  1.12s/it]

Episode 7621/8000, real env return = -100.85


 95%|███████████████████████████████████▎ | 7631/8000 [2:28:47<07:07,  1.16s/it]

Episode 7631/8000, real env return = -99.95


 96%|███████████████████████████████████▎ | 7641/8000 [2:28:59<06:49,  1.14s/it]

Episode 7641/8000, real env return = -101.10


 96%|███████████████████████████████████▍ | 7651/8000 [2:29:11<06:52,  1.18s/it]

Episode 7651/8000, real env return = -99.21


 96%|███████████████████████████████████▍ | 7661/8000 [2:29:22<06:43,  1.19s/it]

Episode 7661/8000, real env return = -98.44


 96%|███████████████████████████████████▍ | 7671/8000 [2:29:33<06:01,  1.10s/it]

Episode 7671/8000, real env return = -99.75


 96%|███████████████████████████████████▌ | 7681/8000 [2:29:45<06:08,  1.15s/it]

Episode 7681/8000, real env return = -99.70


 96%|███████████████████████████████████▌ | 7691/8000 [2:29:56<05:51,  1.14s/it]

Episode 7691/8000, real env return = -99.83


 96%|███████████████████████████████████▌ | 7701/8000 [2:30:08<05:51,  1.18s/it]

Episode 7701/8000, real env return = -100.12


 96%|███████████████████████████████████▋ | 7711/8000 [2:30:20<05:48,  1.21s/it]

Episode 7711/8000, real env return = -99.58


 97%|███████████████████████████████████▋ | 7721/8000 [2:30:32<05:13,  1.12s/it]

Episode 7721/8000, real env return = -99.75


 97%|███████████████████████████████████▊ | 7731/8000 [2:30:43<05:01,  1.12s/it]

Episode 7731/8000, real env return = -100.34


 97%|███████████████████████████████████▊ | 7741/8000 [2:30:55<05:13,  1.21s/it]

Episode 7741/8000, real env return = -99.51


 97%|███████████████████████████████████▊ | 7751/8000 [2:31:06<04:27,  1.07s/it]

Episode 7751/8000, real env return = -100.30


 97%|███████████████████████████████████▉ | 7761/8000 [2:31:18<04:52,  1.23s/it]

Episode 7761/8000, real env return = -99.24


 97%|███████████████████████████████████▉ | 7771/8000 [2:31:30<04:24,  1.16s/it]

Episode 7771/8000, real env return = -116.59


 97%|███████████████████████████████████▉ | 7781/8000 [2:31:41<04:22,  1.20s/it]

Episode 7781/8000, real env return = -101.25


 97%|████████████████████████████████████ | 7791/8000 [2:31:53<04:04,  1.17s/it]

Episode 7791/8000, real env return = -99.58


 98%|████████████████████████████████████ | 7801/8000 [2:32:04<03:54,  1.18s/it]

Episode 7801/8000, real env return = -98.24


 98%|████████████████████████████████████▏| 7811/8000 [2:32:16<03:29,  1.11s/it]

Episode 7811/8000, real env return = -98.75


 98%|████████████████████████████████████▏| 7821/8000 [2:32:28<03:29,  1.17s/it]

Episode 7821/8000, real env return = -99.00


 98%|████████████████████████████████████▏| 7831/8000 [2:32:39<03:13,  1.14s/it]

Episode 7831/8000, real env return = -100.17


 98%|████████████████████████████████████▎| 7841/8000 [2:32:51<03:03,  1.15s/it]

Episode 7841/8000, real env return = -98.67


 98%|████████████████████████████████████▎| 7851/8000 [2:33:03<03:06,  1.25s/it]

Episode 7851/8000, real env return = -97.03


 98%|████████████████████████████████████▎| 7861/8000 [2:33:14<02:32,  1.10s/it]

Episode 7861/8000, real env return = -100.43


 98%|████████████████████████████████████▍| 7871/8000 [2:33:26<02:33,  1.19s/it]

Episode 7871/8000, real env return = -100.34


 99%|████████████████████████████████████▍| 7881/8000 [2:33:37<02:12,  1.12s/it]

Episode 7881/8000, real env return = -100.30


 99%|████████████████████████████████████▍| 7891/8000 [2:33:49<02:09,  1.19s/it]

Episode 7891/8000, real env return = -99.94


 99%|████████████████████████████████████▌| 7901/8000 [2:34:01<01:57,  1.18s/it]

Episode 7901/8000, real env return = -98.23


 99%|████████████████████████████████████▌| 7911/8000 [2:34:12<01:41,  1.14s/it]

Episode 7911/8000, real env return = -100.60


 99%|████████████████████████████████████▋| 7921/8000 [2:34:24<01:28,  1.12s/it]

Episode 7921/8000, real env return = -100.12


 99%|████████████████████████████████████▋| 7931/8000 [2:34:35<01:21,  1.17s/it]

Episode 7931/8000, real env return = -100.02


 99%|████████████████████████████████████▋| 7941/8000 [2:34:47<01:07,  1.14s/it]

Episode 7941/8000, real env return = -99.80


 99%|████████████████████████████████████▊| 7951/8000 [2:34:59<00:55,  1.13s/it]

Episode 7951/8000, real env return = -99.84


100%|████████████████████████████████████▊| 7961/8000 [2:35:11<00:47,  1.21s/it]

Episode 7961/8000, real env return = -105.68


100%|████████████████████████████████████▊| 7971/8000 [2:35:22<00:33,  1.16s/it]

Episode 7971/8000, real env return = -100.61


100%|████████████████████████████████████▉| 7981/8000 [2:35:34<00:23,  1.23s/it]

Episode 7981/8000, real env return = -100.15


100%|████████████████████████████████████▉| 7991/8000 [2:35:46<00:10,  1.18s/it]

Episode 7991/8000, real env return = -100.17


100%|█████████████████████████████████████| 8000/8000 [2:35:56<00:00,  1.17s/it]

Training finished.





In [4]:
train2 = main()

Using cuda device


  0%|                                                  | 0/8000 [00:00<?, ?it/s]

Episode 1/8000, real env return = -104.69


  0%|                                       | 11/8000 [00:08<1:58:06,  1.13it/s]

Episode 11/8000, real env return = -111.12


  0%|                                       | 21/8000 [00:21<2:20:26,  1.06s/it]

Episode 21/8000, real env return = -103.09


  0%|▏                                      | 31/8000 [00:35<2:58:01,  1.34s/it]

Episode 31/8000, real env return = -107.93


  1%|▏                                      | 41/8000 [00:52<3:47:06,  1.71s/it]

Episode 41/8000, real env return = -76.62


  1%|▏                                      | 51/8000 [01:14<4:39:02,  2.11s/it]

Episode 51/8000, real env return = -62.77


  1%|▎                                      | 61/8000 [01:29<2:38:03,  1.19s/it]

Episode 61/8000, real env return = -125.50


  1%|▎                                      | 71/8000 [01:41<2:41:56,  1.23s/it]

Episode 71/8000, real env return = -125.67


  1%|▍                                      | 81/8000 [02:00<3:58:49,  1.81s/it]

Episode 81/8000, real env return = -88.30


  1%|▍                                      | 91/8000 [02:16<3:03:24,  1.39s/it]

Episode 91/8000, real env return = -125.34


  1%|▍                                     | 101/8000 [02:31<2:40:23,  1.22s/it]

Episode 101/8000, real env return = -106.38


  1%|▌                                     | 111/8000 [02:43<2:48:42,  1.28s/it]

Episode 111/8000, real env return = -115.94


  2%|▌                                     | 121/8000 [02:53<2:20:20,  1.07s/it]

Episode 121/8000, real env return = -104.18


  2%|▌                                     | 131/8000 [03:03<1:57:57,  1.11it/s]

Episode 131/8000, real env return = -115.68


  2%|▋                                     | 141/8000 [03:12<1:51:28,  1.17it/s]

Episode 141/8000, real env return = -111.12


  2%|▋                                     | 151/8000 [03:27<3:08:45,  1.44s/it]

Episode 151/8000, real env return = -100.97


  2%|▊                                     | 161/8000 [03:45<4:25:25,  2.03s/it]

Episode 161/8000, real env return = -117.67


  2%|▊                                     | 171/8000 [03:55<2:12:37,  1.02s/it]

Episode 171/8000, real env return = -116.68


  2%|▊                                     | 181/8000 [04:06<2:10:42,  1.00s/it]

Episode 181/8000, real env return = -124.34


  2%|▉                                     | 191/8000 [04:16<2:05:29,  1.04it/s]

Episode 191/8000, real env return = -107.78


  3%|▉                                     | 201/8000 [04:28<3:15:24,  1.50s/it]

Episode 201/8000, real env return = -85.11


  3%|█                                     | 211/8000 [04:45<2:55:13,  1.35s/it]

Episode 211/8000, real env return = -127.39


  3%|█                                     | 221/8000 [04:55<2:23:33,  1.11s/it]

Episode 221/8000, real env return = -116.05


  3%|█                                     | 231/8000 [05:05<1:59:58,  1.08it/s]

Episode 231/8000, real env return = -104.34


  3%|█▏                                    | 241/8000 [05:19<2:40:11,  1.24s/it]

Episode 241/8000, real env return = -110.66


  3%|█▏                                    | 251/8000 [05:28<1:56:34,  1.11it/s]

Episode 251/8000, real env return = -100.23


  3%|█▏                                    | 261/8000 [05:40<2:01:47,  1.06it/s]

Episode 261/8000, real env return = -102.74


  3%|█▎                                    | 271/8000 [05:54<2:55:11,  1.36s/it]

Episode 271/8000, real env return = -112.19


  4%|█▎                                    | 281/8000 [06:03<2:01:38,  1.06it/s]

Episode 281/8000, real env return = -115.05


  4%|█▍                                    | 291/8000 [06:11<1:51:26,  1.15it/s]

Episode 291/8000, real env return = -102.94


  4%|█▍                                    | 301/8000 [06:20<1:48:57,  1.18it/s]

Episode 301/8000, real env return = -102.56


  4%|█▍                                    | 311/8000 [06:28<1:47:32,  1.19it/s]

Episode 311/8000, real env return = -103.01


  4%|█▌                                    | 321/8000 [06:37<1:45:59,  1.21it/s]

Episode 321/8000, real env return = -103.68


  4%|█▌                                    | 331/8000 [06:45<1:47:21,  1.19it/s]

Episode 331/8000, real env return = -103.81


  4%|█▌                                    | 341/8000 [06:54<1:50:04,  1.16it/s]

Episode 341/8000, real env return = -102.44


  4%|█▋                                    | 351/8000 [07:03<1:53:16,  1.13it/s]

Episode 351/8000, real env return = -103.11


  5%|█▋                                    | 361/8000 [07:11<1:52:45,  1.13it/s]

Episode 361/8000, real env return = -101.84


  5%|█▊                                    | 371/8000 [07:20<1:51:14,  1.14it/s]

Episode 371/8000, real env return = -100.15


  5%|█▊                                    | 381/8000 [07:28<1:48:06,  1.17it/s]

Episode 381/8000, real env return = -102.58


  5%|█▊                                    | 391/8000 [07:37<1:50:47,  1.14it/s]

Episode 391/8000, real env return = -102.40


  5%|█▉                                    | 401/8000 [07:46<1:46:14,  1.19it/s]

Episode 401/8000, real env return = -101.80


  5%|█▉                                    | 411/8000 [07:54<1:47:22,  1.18it/s]

Episode 411/8000, real env return = -104.90


  5%|█▉                                    | 421/8000 [08:03<1:47:52,  1.17it/s]

Episode 421/8000, real env return = -100.51


  5%|██                                    | 431/8000 [08:11<1:50:34,  1.14it/s]

Episode 431/8000, real env return = -110.35


  6%|██                                    | 441/8000 [08:20<1:50:36,  1.14it/s]

Episode 441/8000, real env return = -104.38


  6%|██▏                                   | 451/8000 [08:29<1:49:38,  1.15it/s]

Episode 451/8000, real env return = -105.42


  6%|██▏                                   | 461/8000 [08:38<1:51:25,  1.13it/s]

Episode 461/8000, real env return = -117.59


  6%|██▏                                   | 471/8000 [08:47<1:53:23,  1.11it/s]

Episode 471/8000, real env return = -113.53


  6%|██▎                                   | 481/8000 [08:55<1:47:48,  1.16it/s]

Episode 481/8000, real env return = -114.99


  6%|██▎                                   | 491/8000 [09:04<1:48:04,  1.16it/s]

Episode 491/8000, real env return = -103.59


  6%|██▍                                   | 501/8000 [09:13<1:49:06,  1.15it/s]

Episode 501/8000, real env return = -102.41


  6%|██▍                                   | 511/8000 [09:21<1:45:51,  1.18it/s]

Episode 511/8000, real env return = -102.66


  7%|██▍                                   | 521/8000 [09:30<1:50:51,  1.12it/s]

Episode 521/8000, real env return = -101.06


  7%|██▌                                   | 531/8000 [09:38<1:46:19,  1.17it/s]

Episode 531/8000, real env return = -99.63


  7%|██▌                                   | 541/8000 [09:47<1:45:20,  1.18it/s]

Episode 541/8000, real env return = -103.04


  7%|██▌                                   | 551/8000 [09:55<1:44:46,  1.18it/s]

Episode 551/8000, real env return = -101.92


  7%|██▋                                   | 561/8000 [10:04<1:45:12,  1.18it/s]

Episode 561/8000, real env return = -102.08


  7%|██▋                                   | 571/8000 [10:13<1:48:15,  1.14it/s]

Episode 571/8000, real env return = -103.61


  7%|██▊                                   | 581/8000 [10:21<1:44:06,  1.19it/s]

Episode 581/8000, real env return = -102.64


  7%|██▊                                   | 591/8000 [10:30<1:44:11,  1.19it/s]

Episode 591/8000, real env return = -102.56


  8%|██▊                                   | 601/8000 [10:38<1:47:38,  1.15it/s]

Episode 601/8000, real env return = -103.45


  8%|██▉                                   | 611/8000 [10:47<1:43:14,  1.19it/s]

Episode 611/8000, real env return = -102.14


  8%|██▉                                   | 621/8000 [10:56<1:49:10,  1.13it/s]

Episode 621/8000, real env return = -101.88


  8%|██▉                                   | 631/8000 [11:04<1:46:56,  1.15it/s]

Episode 631/8000, real env return = -102.20


  8%|███                                   | 641/8000 [11:13<1:43:20,  1.19it/s]

Episode 641/8000, real env return = -102.56


  8%|███                                   | 651/8000 [11:21<1:47:55,  1.13it/s]

Episode 651/8000, real env return = -102.91


  8%|███▏                                  | 661/8000 [11:30<1:44:47,  1.17it/s]

Episode 661/8000, real env return = -101.91


  8%|███▏                                  | 671/8000 [11:38<1:45:38,  1.16it/s]

Episode 671/8000, real env return = -103.24


  9%|███▏                                  | 681/8000 [11:47<1:42:43,  1.19it/s]

Episode 681/8000, real env return = -104.21


  9%|███▎                                  | 691/8000 [11:56<1:44:22,  1.17it/s]

Episode 691/8000, real env return = -103.00


  9%|███▎                                  | 701/8000 [12:04<1:46:55,  1.14it/s]

Episode 701/8000, real env return = -103.30


  9%|███▍                                  | 711/8000 [12:13<1:47:27,  1.13it/s]

Episode 711/8000, real env return = -103.47


  9%|███▍                                  | 721/8000 [12:22<1:44:02,  1.17it/s]

Episode 721/8000, real env return = -104.11


  9%|███▍                                  | 731/8000 [12:30<1:44:19,  1.16it/s]

Episode 731/8000, real env return = -104.27


  9%|███▌                                  | 741/8000 [12:39<1:42:20,  1.18it/s]

Episode 741/8000, real env return = -103.73


  9%|███▌                                  | 751/8000 [12:48<1:48:56,  1.11it/s]

Episode 751/8000, real env return = -102.72


 10%|███▌                                  | 761/8000 [12:56<1:43:32,  1.17it/s]

Episode 761/8000, real env return = -103.30


 10%|███▋                                  | 771/8000 [13:05<1:41:45,  1.18it/s]

Episode 771/8000, real env return = -103.52


 10%|███▋                                  | 781/8000 [13:13<1:46:59,  1.12it/s]

Episode 781/8000, real env return = -103.07


 10%|███▊                                  | 791/8000 [13:22<1:46:38,  1.13it/s]

Episode 791/8000, real env return = -101.73


 10%|███▊                                  | 801/8000 [13:31<1:42:22,  1.17it/s]

Episode 801/8000, real env return = -102.65


 10%|███▊                                  | 811/8000 [13:39<1:40:35,  1.19it/s]

Episode 811/8000, real env return = -102.99


 10%|███▉                                  | 821/8000 [13:48<1:40:50,  1.19it/s]

Episode 821/8000, real env return = -102.95


 10%|███▉                                  | 831/8000 [13:56<1:45:00,  1.14it/s]

Episode 831/8000, real env return = -102.95


 11%|███▉                                  | 841/8000 [14:05<1:45:19,  1.13it/s]

Episode 841/8000, real env return = -103.34


 11%|████                                  | 851/8000 [14:14<1:41:59,  1.17it/s]

Episode 851/8000, real env return = -99.60


 11%|████                                  | 861/8000 [14:22<1:44:11,  1.14it/s]

Episode 861/8000, real env return = -103.42


 11%|████▏                                 | 871/8000 [14:31<1:42:06,  1.16it/s]

Episode 871/8000, real env return = -113.25


 11%|████▏                                 | 881/8000 [14:39<1:42:20,  1.16it/s]

Episode 881/8000, real env return = -101.56


 11%|████▏                                 | 891/8000 [14:48<1:40:52,  1.17it/s]

Episode 891/8000, real env return = -103.48


 11%|████▎                                 | 901/8000 [14:57<1:40:37,  1.18it/s]

Episode 901/8000, real env return = -103.50


 11%|████▎                                 | 911/8000 [15:05<1:38:11,  1.20it/s]

Episode 911/8000, real env return = -103.34


 12%|████▎                                 | 921/8000 [15:14<1:43:02,  1.14it/s]

Episode 921/8000, real env return = -103.86


 12%|████▍                                 | 931/8000 [15:22<1:40:23,  1.17it/s]

Episode 931/8000, real env return = -104.34


 12%|████▍                                 | 941/8000 [15:31<1:42:42,  1.15it/s]

Episode 941/8000, real env return = -101.73


 12%|████▌                                 | 951/8000 [15:40<1:43:47,  1.13it/s]

Episode 951/8000, real env return = -112.85


 12%|████▌                                 | 961/8000 [15:48<1:39:30,  1.18it/s]

Episode 961/8000, real env return = -109.11


 12%|████▌                                 | 971/8000 [15:57<1:38:49,  1.19it/s]

Episode 971/8000, real env return = -103.71


 12%|████▋                                 | 981/8000 [16:05<1:40:34,  1.16it/s]

Episode 981/8000, real env return = -103.61


 12%|████▋                                 | 991/8000 [16:14<1:45:05,  1.11it/s]

Episode 991/8000, real env return = -110.31


 13%|████▋                                | 1001/8000 [16:23<1:40:59,  1.15it/s]

Episode 1001/8000, real env return = -102.09


 13%|████▋                                | 1011/8000 [16:32<1:39:06,  1.18it/s]

Episode 1011/8000, real env return = -103.13


 13%|████▋                                | 1021/8000 [16:40<1:39:19,  1.17it/s]

Episode 1021/8000, real env return = -103.85


 13%|████▊                                | 1031/8000 [16:49<1:41:23,  1.15it/s]

Episode 1031/8000, real env return = -102.88


 13%|████▊                                | 1041/8000 [16:57<1:40:23,  1.16it/s]

Episode 1041/8000, real env return = -103.63


 13%|████▊                                | 1051/8000 [17:06<1:41:00,  1.15it/s]

Episode 1051/8000, real env return = -101.49


 13%|████▉                                | 1061/8000 [17:15<1:39:55,  1.16it/s]

Episode 1061/8000, real env return = -102.68


 13%|████▉                                | 1071/8000 [17:23<1:42:47,  1.12it/s]

Episode 1071/8000, real env return = -110.74


 14%|████▉                                | 1081/8000 [17:32<1:37:23,  1.18it/s]

Episode 1081/8000, real env return = -109.81


 14%|█████                                | 1091/8000 [17:41<1:42:19,  1.13it/s]

Episode 1091/8000, real env return = -101.86


 14%|█████                                | 1101/8000 [17:49<1:41:10,  1.14it/s]

Episode 1101/8000, real env return = -101.60


 14%|█████▏                               | 1111/8000 [17:58<1:38:32,  1.17it/s]

Episode 1111/8000, real env return = -111.22


 14%|█████▏                               | 1121/8000 [18:07<1:37:28,  1.18it/s]

Episode 1121/8000, real env return = -109.67


 14%|█████▏                               | 1131/8000 [18:15<1:39:21,  1.15it/s]

Episode 1131/8000, real env return = -101.69


 14%|█████▎                               | 1141/8000 [18:24<1:35:02,  1.20it/s]

Episode 1141/8000, real env return = -104.95


 14%|█████▎                               | 1151/8000 [18:32<1:37:21,  1.17it/s]

Episode 1151/8000, real env return = -103.57


 15%|█████▎                               | 1161/8000 [18:41<1:36:10,  1.19it/s]

Episode 1161/8000, real env return = -111.40


 15%|█████▍                               | 1171/8000 [18:50<1:36:48,  1.18it/s]

Episode 1171/8000, real env return = -105.45


 15%|█████▍                               | 1181/8000 [18:58<1:35:29,  1.19it/s]

Episode 1181/8000, real env return = -103.56


 15%|█████▌                               | 1191/8000 [19:07<1:35:24,  1.19it/s]

Episode 1191/8000, real env return = -103.76


 15%|█████▌                               | 1201/8000 [19:15<1:39:52,  1.13it/s]

Episode 1201/8000, real env return = -113.37


 15%|█████▌                               | 1211/8000 [19:24<1:35:27,  1.19it/s]

Episode 1211/8000, real env return = -104.41


 15%|█████▋                               | 1221/8000 [19:32<1:40:25,  1.13it/s]

Episode 1221/8000, real env return = -102.62


 15%|█████▋                               | 1231/8000 [19:41<1:36:00,  1.18it/s]

Episode 1231/8000, real env return = -101.87


 16%|█████▋                               | 1241/8000 [19:50<1:37:12,  1.16it/s]

Episode 1241/8000, real env return = -101.71


 16%|█████▊                               | 1251/8000 [19:58<1:34:29,  1.19it/s]

Episode 1251/8000, real env return = -101.48


 16%|█████▊                               | 1261/8000 [20:07<1:38:52,  1.14it/s]

Episode 1261/8000, real env return = -101.65


 16%|█████▉                               | 1271/8000 [20:15<1:34:23,  1.19it/s]

Episode 1271/8000, real env return = -102.42


 16%|█████▉                               | 1281/8000 [20:24<1:33:53,  1.19it/s]

Episode 1281/8000, real env return = -102.12


 16%|█████▉                               | 1291/8000 [20:32<1:36:43,  1.16it/s]

Episode 1291/8000, real env return = -102.88


 16%|██████                               | 1301/8000 [20:41<1:31:01,  1.23it/s]

Episode 1301/8000, real env return = -102.13


 16%|██████                               | 1311/8000 [20:49<1:32:28,  1.21it/s]

Episode 1311/8000, real env return = -102.28


 17%|██████                               | 1321/8000 [20:58<1:39:19,  1.12it/s]

Episode 1321/8000, real env return = -102.60


 17%|██████▏                              | 1331/8000 [21:06<1:34:45,  1.17it/s]

Episode 1331/8000, real env return = -103.86


 17%|██████▏                              | 1341/8000 [21:15<1:37:57,  1.13it/s]

Episode 1341/8000, real env return = -102.81


 17%|██████▏                              | 1351/8000 [21:24<1:35:46,  1.16it/s]

Episode 1351/8000, real env return = -109.49


 17%|██████▎                              | 1361/8000 [21:33<1:38:47,  1.12it/s]

Episode 1361/8000, real env return = -103.31


 17%|██████▎                              | 1371/8000 [21:41<1:39:01,  1.12it/s]

Episode 1371/8000, real env return = -103.72


 17%|██████▍                              | 1381/8000 [21:50<1:36:36,  1.14it/s]

Episode 1381/8000, real env return = -108.24


 17%|██████▍                              | 1391/8000 [21:59<1:33:25,  1.18it/s]

Episode 1391/8000, real env return = -104.04


 18%|██████▍                              | 1401/8000 [22:07<1:33:10,  1.18it/s]

Episode 1401/8000, real env return = -109.38


 18%|██████▌                              | 1411/8000 [22:16<1:35:32,  1.15it/s]

Episode 1411/8000, real env return = -103.41


 18%|██████▌                              | 1421/8000 [22:24<1:36:12,  1.14it/s]

Episode 1421/8000, real env return = -108.22


 18%|██████▌                              | 1431/8000 [22:33<1:32:48,  1.18it/s]

Episode 1431/8000, real env return = -113.59


 18%|██████▋                              | 1441/8000 [22:41<1:32:08,  1.19it/s]

Episode 1441/8000, real env return = -112.21


 18%|██████▋                              | 1451/8000 [22:50<1:31:07,  1.20it/s]

Episode 1451/8000, real env return = -109.91


 18%|██████▊                              | 1461/8000 [22:59<1:37:12,  1.12it/s]

Episode 1461/8000, real env return = -107.66


 18%|██████▊                              | 1471/8000 [23:07<1:34:33,  1.15it/s]

Episode 1471/8000, real env return = -108.04


 19%|██████▊                              | 1481/8000 [23:16<1:32:15,  1.18it/s]

Episode 1481/8000, real env return = -109.24


 19%|██████▉                              | 1491/8000 [23:24<1:32:02,  1.18it/s]

Episode 1491/8000, real env return = -108.43


 19%|██████▉                              | 1501/8000 [23:32<1:32:20,  1.17it/s]

Episode 1501/8000, real env return = -108.28


 19%|██████▉                              | 1511/8000 [23:41<1:34:59,  1.14it/s]

Episode 1511/8000, real env return = -109.97


 19%|███████                              | 1521/8000 [23:50<1:32:45,  1.16it/s]

Episode 1521/8000, real env return = -109.54


 19%|███████                              | 1531/8000 [23:58<1:32:56,  1.16it/s]

Episode 1531/8000, real env return = -109.67


 19%|███████▏                             | 1541/8000 [24:07<1:32:27,  1.16it/s]

Episode 1541/8000, real env return = -108.35


 19%|███████▏                             | 1551/8000 [24:16<1:44:00,  1.03it/s]

Episode 1551/8000, real env return = -110.62


 20%|███████▏                             | 1561/8000 [24:25<1:39:18,  1.08it/s]

Episode 1561/8000, real env return = -112.54


 20%|███████▎                             | 1571/8000 [24:35<1:37:37,  1.10it/s]

Episode 1571/8000, real env return = -111.52


 20%|███████▎                             | 1581/8000 [24:43<1:32:23,  1.16it/s]

Episode 1581/8000, real env return = -109.20


 20%|███████▎                             | 1591/8000 [24:52<1:32:18,  1.16it/s]

Episode 1591/8000, real env return = -113.19


 20%|███████▍                             | 1601/8000 [25:01<1:31:38,  1.16it/s]

Episode 1601/8000, real env return = -111.30


 20%|███████▍                             | 1611/8000 [25:10<1:35:03,  1.12it/s]

Episode 1611/8000, real env return = -108.19


 20%|███████▍                             | 1621/8000 [25:18<1:29:25,  1.19it/s]

Episode 1621/8000, real env return = -108.50


 20%|███████▌                             | 1631/8000 [25:27<1:32:17,  1.15it/s]

Episode 1631/8000, real env return = -111.80


 21%|███████▌                             | 1641/8000 [25:35<1:34:24,  1.12it/s]

Episode 1641/8000, real env return = -111.78


 21%|███████▋                             | 1651/8000 [25:44<1:30:30,  1.17it/s]

Episode 1651/8000, real env return = -110.12


 21%|███████▋                             | 1661/8000 [25:52<1:28:52,  1.19it/s]

Episode 1661/8000, real env return = -110.11


 21%|███████▋                             | 1671/8000 [26:01<1:30:21,  1.17it/s]

Episode 1671/8000, real env return = -107.71


 21%|███████▊                             | 1681/8000 [26:10<1:29:43,  1.17it/s]

Episode 1681/8000, real env return = -108.09


 21%|███████▊                             | 1691/8000 [26:18<1:28:49,  1.18it/s]

Episode 1691/8000, real env return = -107.98


 21%|███████▊                             | 1701/8000 [26:27<1:33:08,  1.13it/s]

Episode 1701/8000, real env return = -108.81


 21%|███████▉                             | 1711/8000 [26:36<1:28:41,  1.18it/s]

Episode 1711/8000, real env return = -107.58


 22%|███████▉                             | 1721/8000 [26:44<1:27:38,  1.19it/s]

Episode 1721/8000, real env return = -107.90


 22%|████████                             | 1731/8000 [26:53<1:28:14,  1.18it/s]

Episode 1731/8000, real env return = -112.79


 22%|████████                             | 1741/8000 [27:01<1:28:16,  1.18it/s]

Episode 1741/8000, real env return = -112.58


 22%|████████                             | 1751/8000 [27:10<1:27:34,  1.19it/s]

Episode 1751/8000, real env return = -110.89


 22%|████████▏                            | 1761/8000 [27:18<1:30:57,  1.14it/s]

Episode 1761/8000, real env return = -109.06


 22%|████████▏                            | 1771/8000 [27:27<1:33:01,  1.12it/s]

Episode 1771/8000, real env return = -110.37


 22%|████████▏                            | 1781/8000 [27:36<1:28:52,  1.17it/s]

Episode 1781/8000, real env return = -112.33


 22%|████████▎                            | 1791/8000 [27:44<1:28:55,  1.16it/s]

Episode 1791/8000, real env return = -108.73


 23%|████████▎                            | 1801/8000 [27:53<1:27:33,  1.18it/s]

Episode 1801/8000, real env return = -110.29


 23%|████████▍                            | 1811/8000 [28:01<1:29:01,  1.16it/s]

Episode 1811/8000, real env return = -108.21


 23%|████████▍                            | 1821/8000 [28:10<1:29:46,  1.15it/s]

Episode 1821/8000, real env return = -109.52


 23%|████████▍                            | 1831/8000 [28:19<1:30:11,  1.14it/s]

Episode 1831/8000, real env return = -108.66


 23%|████████▌                            | 1841/8000 [28:28<1:29:47,  1.14it/s]

Episode 1841/8000, real env return = -109.65


 23%|████████▌                            | 1851/8000 [28:37<1:30:41,  1.13it/s]

Episode 1851/8000, real env return = -109.83


 23%|████████▌                            | 1861/8000 [28:46<1:31:12,  1.12it/s]

Episode 1861/8000, real env return = -106.23


 23%|████████▋                            | 1871/8000 [28:55<1:29:40,  1.14it/s]

Episode 1871/8000, real env return = -109.26


 24%|████████▋                            | 1881/8000 [29:04<1:33:46,  1.09it/s]

Episode 1881/8000, real env return = -111.12


 24%|████████▋                            | 1891/8000 [29:13<1:31:10,  1.12it/s]

Episode 1891/8000, real env return = -105.33


 24%|████████▊                            | 1901/8000 [29:22<1:29:00,  1.14it/s]

Episode 1901/8000, real env return = -107.75


 24%|████████▊                            | 1911/8000 [29:31<1:31:47,  1.11it/s]

Episode 1911/8000, real env return = -110.91


 24%|████████▉                            | 1921/8000 [29:39<1:27:03,  1.16it/s]

Episode 1921/8000, real env return = -110.55


 24%|████████▉                            | 1931/8000 [29:48<1:31:40,  1.10it/s]

Episode 1931/8000, real env return = -109.08


 24%|████████▉                            | 1941/8000 [29:57<1:27:49,  1.15it/s]

Episode 1941/8000, real env return = -110.76


 24%|█████████                            | 1951/8000 [30:06<1:27:20,  1.15it/s]

Episode 1951/8000, real env return = -110.07


 25%|█████████                            | 1961/8000 [30:14<1:27:09,  1.15it/s]

Episode 1961/8000, real env return = -110.28


 25%|█████████                            | 1971/8000 [30:23<1:24:23,  1.19it/s]

Episode 1971/8000, real env return = -110.46


 25%|█████████▏                           | 1981/8000 [30:32<1:26:24,  1.16it/s]

Episode 1981/8000, real env return = -110.86


 25%|█████████▏                           | 1991/8000 [30:40<1:29:04,  1.12it/s]

Episode 1991/8000, real env return = -111.26


 25%|█████████▎                           | 2001/8000 [30:49<1:25:13,  1.17it/s]

Episode 2001/8000, real env return = -111.91


 25%|█████████▎                           | 2011/8000 [30:58<1:24:31,  1.18it/s]

Episode 2011/8000, real env return = -109.83


 25%|█████████▎                           | 2021/8000 [31:06<1:23:18,  1.20it/s]

Episode 2021/8000, real env return = -109.32


 25%|█████████▍                           | 2031/8000 [31:15<1:24:29,  1.18it/s]

Episode 2031/8000, real env return = -107.17


 26%|█████████▍                           | 2041/8000 [31:24<1:26:51,  1.14it/s]

Episode 2041/8000, real env return = -109.16


 26%|█████████▍                           | 2051/8000 [31:32<1:23:46,  1.18it/s]

Episode 2051/8000, real env return = -111.55


 26%|█████████▌                           | 2061/8000 [31:41<1:24:18,  1.17it/s]

Episode 2061/8000, real env return = -110.73


 26%|█████████▌                           | 2071/8000 [31:50<1:24:14,  1.17it/s]

Episode 2071/8000, real env return = -112.72


 26%|█████████▌                           | 2081/8000 [31:58<1:23:23,  1.18it/s]

Episode 2081/8000, real env return = -110.67


 26%|█████████▋                           | 2091/8000 [32:07<1:26:51,  1.13it/s]

Episode 2091/8000, real env return = -110.34


 26%|█████████▋                           | 2101/8000 [32:15<1:23:09,  1.18it/s]

Episode 2101/8000, real env return = -113.18


 26%|█████████▊                           | 2111/8000 [32:24<1:26:32,  1.13it/s]

Episode 2111/8000, real env return = -111.37


 27%|█████████▊                           | 2121/8000 [32:33<1:27:29,  1.12it/s]

Episode 2121/8000, real env return = -110.58


 27%|█████████▊                           | 2131/8000 [32:41<1:26:30,  1.13it/s]

Episode 2131/8000, real env return = -112.20


 27%|█████████▉                           | 2141/8000 [32:50<1:25:56,  1.14it/s]

Episode 2141/8000, real env return = -111.01


 27%|█████████▉                           | 2151/8000 [32:59<1:23:51,  1.16it/s]

Episode 2151/8000, real env return = -109.25


 27%|█████████▉                           | 2161/8000 [33:08<1:24:34,  1.15it/s]

Episode 2161/8000, real env return = -108.03


 27%|██████████                           | 2171/8000 [33:17<1:27:16,  1.11it/s]

Episode 2171/8000, real env return = -107.63


 27%|██████████                           | 2181/8000 [33:25<1:24:42,  1.14it/s]

Episode 2181/8000, real env return = -108.10


 27%|██████████▏                          | 2191/8000 [33:34<1:22:11,  1.18it/s]

Episode 2191/8000, real env return = -108.33


 28%|██████████▏                          | 2201/8000 [33:43<1:24:53,  1.14it/s]

Episode 2201/8000, real env return = -108.53


 28%|██████████▏                          | 2211/8000 [33:52<1:22:57,  1.16it/s]

Episode 2211/8000, real env return = -108.20


 28%|██████████▎                          | 2221/8000 [34:01<1:23:05,  1.16it/s]

Episode 2221/8000, real env return = -108.85


 28%|██████████▎                          | 2231/8000 [34:09<1:22:56,  1.16it/s]

Episode 2231/8000, real env return = -108.32


 28%|██████████▎                          | 2241/8000 [34:18<1:26:18,  1.11it/s]

Episode 2241/8000, real env return = -109.70


 28%|██████████▍                          | 2251/8000 [34:27<1:25:13,  1.12it/s]

Episode 2251/8000, real env return = -110.19


 28%|██████████▍                          | 2261/8000 [34:35<1:20:44,  1.18it/s]

Episode 2261/8000, real env return = -110.53


 28%|██████████▌                          | 2271/8000 [34:44<1:25:11,  1.12it/s]

Episode 2271/8000, real env return = -110.26


 29%|██████████▌                          | 2281/8000 [34:53<1:24:35,  1.13it/s]

Episode 2281/8000, real env return = -109.57


 29%|██████████▌                          | 2291/8000 [35:02<1:22:05,  1.16it/s]

Episode 2291/8000, real env return = -110.11


 29%|██████████▋                          | 2301/8000 [35:10<1:20:56,  1.17it/s]

Episode 2301/8000, real env return = -109.56


 29%|██████████▋                          | 2311/8000 [35:19<1:25:08,  1.11it/s]

Episode 2311/8000, real env return = -109.67


 29%|██████████▋                          | 2321/8000 [35:28<1:24:16,  1.12it/s]

Episode 2321/8000, real env return = -115.80


 29%|██████████▊                          | 2331/8000 [35:36<1:23:21,  1.13it/s]

Episode 2331/8000, real env return = -109.72


 29%|██████████▊                          | 2341/8000 [35:45<1:20:27,  1.17it/s]

Episode 2341/8000, real env return = -111.50


 29%|██████████▊                          | 2351/8000 [35:54<1:19:57,  1.18it/s]

Episode 2351/8000, real env return = -109.34


 30%|██████████▉                          | 2361/8000 [36:02<1:21:59,  1.15it/s]

Episode 2361/8000, real env return = -109.31


 30%|██████████▉                          | 2371/8000 [36:11<1:18:08,  1.20it/s]

Episode 2371/8000, real env return = -108.60


 30%|███████████                          | 2381/8000 [36:20<1:22:27,  1.14it/s]

Episode 2381/8000, real env return = -108.88


 30%|███████████                          | 2391/8000 [36:28<1:20:57,  1.15it/s]

Episode 2391/8000, real env return = -109.15


 30%|███████████                          | 2401/8000 [36:37<1:26:53,  1.07it/s]

Episode 2401/8000, real env return = -110.77


 30%|███████████▏                         | 2411/8000 [36:46<1:19:55,  1.17it/s]

Episode 2411/8000, real env return = -108.43


 30%|███████████▏                         | 2421/8000 [36:54<1:18:16,  1.19it/s]

Episode 2421/8000, real env return = -108.25


 30%|███████████▏                         | 2431/8000 [37:03<1:18:39,  1.18it/s]

Episode 2431/8000, real env return = -108.32


 31%|███████████▎                         | 2441/8000 [37:12<1:21:13,  1.14it/s]

Episode 2441/8000, real env return = -108.33


 31%|███████████▎                         | 2451/8000 [37:20<1:21:06,  1.14it/s]

Episode 2451/8000, real env return = -109.43


 31%|███████████▍                         | 2461/8000 [37:29<1:20:32,  1.15it/s]

Episode 2461/8000, real env return = -109.57


 31%|███████████▍                         | 2471/8000 [37:38<1:18:32,  1.17it/s]

Episode 2471/8000, real env return = -108.03


 31%|███████████▍                         | 2481/8000 [37:47<1:22:58,  1.11it/s]

Episode 2481/8000, real env return = -108.92


 31%|███████████▌                         | 2491/8000 [37:55<1:18:27,  1.17it/s]

Episode 2491/8000, real env return = -109.30


 31%|███████████▌                         | 2501/8000 [38:04<1:20:57,  1.13it/s]

Episode 2501/8000, real env return = -109.36


 31%|███████████▌                         | 2511/8000 [38:13<1:19:27,  1.15it/s]

Episode 2511/8000, real env return = -108.52


 32%|███████████▋                         | 2521/8000 [38:21<1:15:57,  1.20it/s]

Episode 2521/8000, real env return = -107.97


 32%|███████████▋                         | 2531/8000 [38:30<1:16:06,  1.20it/s]

Episode 2531/8000, real env return = -110.64


 32%|███████████▊                         | 2541/8000 [38:38<1:15:24,  1.21it/s]

Episode 2541/8000, real env return = -109.30


 32%|███████████▊                         | 2551/8000 [38:47<1:18:20,  1.16it/s]

Episode 2551/8000, real env return = -111.89


 32%|███████████▊                         | 2561/8000 [38:55<1:15:39,  1.20it/s]

Episode 2561/8000, real env return = -109.90


 32%|███████████▉                         | 2571/8000 [39:04<1:15:19,  1.20it/s]

Episode 2571/8000, real env return = -103.00


 32%|███████████▉                         | 2581/8000 [39:12<1:19:31,  1.14it/s]

Episode 2581/8000, real env return = -105.19


 32%|███████████▉                         | 2591/8000 [39:21<1:18:15,  1.15it/s]

Episode 2591/8000, real env return = -104.40


 33%|████████████                         | 2601/8000 [39:29<1:16:17,  1.18it/s]

Episode 2601/8000, real env return = -104.49


 33%|████████████                         | 2611/8000 [39:38<1:15:09,  1.19it/s]

Episode 2611/8000, real env return = -107.43


 33%|████████████                         | 2621/8000 [39:46<1:12:44,  1.23it/s]

Episode 2621/8000, real env return = -107.08


 33%|████████████▏                        | 2631/8000 [39:55<1:15:18,  1.19it/s]

Episode 2631/8000, real env return = -107.57


 33%|████████████▏                        | 2641/8000 [40:03<1:19:17,  1.13it/s]

Episode 2641/8000, real env return = -109.43


 33%|████████████▎                        | 2651/8000 [40:12<1:19:06,  1.13it/s]

Episode 2651/8000, real env return = -109.13


 33%|████████████▎                        | 2661/8000 [40:20<1:15:09,  1.18it/s]

Episode 2661/8000, real env return = -109.26


 33%|████████████▎                        | 2671/8000 [40:29<1:14:48,  1.19it/s]

Episode 2671/8000, real env return = -110.84


 34%|████████████▍                        | 2681/8000 [40:37<1:16:27,  1.16it/s]

Episode 2681/8000, real env return = -109.95


 34%|████████████▍                        | 2691/8000 [40:46<1:15:22,  1.17it/s]

Episode 2691/8000, real env return = -109.57


 34%|████████████▍                        | 2701/8000 [40:54<1:15:04,  1.18it/s]

Episode 2701/8000, real env return = -110.11


 34%|████████████▌                        | 2711/8000 [41:03<1:17:04,  1.14it/s]

Episode 2711/8000, real env return = -107.39


 34%|████████████▌                        | 2721/8000 [41:11<1:14:01,  1.19it/s]

Episode 2721/8000, real env return = -108.02


 34%|████████████▋                        | 2731/8000 [41:20<1:16:17,  1.15it/s]

Episode 2731/8000, real env return = -107.54


 34%|████████████▋                        | 2741/8000 [41:29<1:16:25,  1.15it/s]

Episode 2741/8000, real env return = -107.69


 34%|████████████▋                        | 2751/8000 [41:38<1:19:57,  1.09it/s]

Episode 2751/8000, real env return = -109.02


 35%|████████████▊                        | 2761/8000 [41:46<1:16:07,  1.15it/s]

Episode 2761/8000, real env return = -108.07


 35%|████████████▊                        | 2771/8000 [41:55<1:16:30,  1.14it/s]

Episode 2771/8000, real env return = -107.99


 35%|████████████▊                        | 2781/8000 [42:04<1:16:00,  1.14it/s]

Episode 2781/8000, real env return = -100.16


 35%|████████████▉                        | 2791/8000 [42:13<1:15:47,  1.15it/s]

Episode 2791/8000, real env return = -104.35


 35%|████████████▉                        | 2801/8000 [42:22<1:16:11,  1.14it/s]

Episode 2801/8000, real env return = -109.19


 35%|█████████████                        | 2811/8000 [42:30<1:10:20,  1.23it/s]

Episode 2811/8000, real env return = -107.33


 35%|█████████████                        | 2821/8000 [42:38<1:11:59,  1.20it/s]

Episode 2821/8000, real env return = -107.95


 35%|█████████████                        | 2831/8000 [42:47<1:11:59,  1.20it/s]

Episode 2831/8000, real env return = -110.05


 36%|█████████████▏                       | 2841/8000 [42:56<1:15:25,  1.14it/s]

Episode 2841/8000, real env return = -108.07


 36%|█████████████▏                       | 2851/8000 [43:04<1:13:26,  1.17it/s]

Episode 2851/8000, real env return = -109.31


 36%|█████████████▏                       | 2861/8000 [43:12<1:11:19,  1.20it/s]

Episode 2861/8000, real env return = -108.98


 36%|█████████████▎                       | 2871/8000 [43:21<1:12:42,  1.18it/s]

Episode 2871/8000, real env return = -108.69


 36%|█████████████▎                       | 2881/8000 [43:30<1:15:47,  1.13it/s]

Episode 2881/8000, real env return = -108.16


 36%|█████████████▎                       | 2891/8000 [43:39<1:12:17,  1.18it/s]

Episode 2891/8000, real env return = -110.35


 36%|█████████████▍                       | 2901/8000 [43:47<1:12:22,  1.17it/s]

Episode 2901/8000, real env return = -106.95


 36%|█████████████▍                       | 2911/8000 [43:56<1:11:42,  1.18it/s]

Episode 2911/8000, real env return = -110.53


 37%|█████████████▌                       | 2921/8000 [44:04<1:10:36,  1.20it/s]

Episode 2921/8000, real env return = -108.58


 37%|█████████████▌                       | 2931/8000 [44:13<1:11:23,  1.18it/s]

Episode 2931/8000, real env return = -109.58


 37%|█████████████▌                       | 2941/8000 [44:21<1:14:31,  1.13it/s]

Episode 2941/8000, real env return = -108.20


 37%|█████████████▋                       | 2951/8000 [44:30<1:11:37,  1.17it/s]

Episode 2951/8000, real env return = -108.10


 37%|█████████████▋                       | 2961/8000 [44:38<1:10:55,  1.18it/s]

Episode 2961/8000, real env return = -107.48


 37%|█████████████▋                       | 2971/8000 [44:47<1:11:32,  1.17it/s]

Episode 2971/8000, real env return = -107.80


 37%|█████████████▊                       | 2981/8000 [44:56<1:15:10,  1.11it/s]

Episode 2981/8000, real env return = -108.57


 37%|█████████████▊                       | 2991/8000 [45:05<1:11:32,  1.17it/s]

Episode 2991/8000, real env return = -108.60


 38%|█████████████▉                       | 3001/8000 [45:13<1:10:20,  1.18it/s]

Episode 3001/8000, real env return = -107.87


 38%|█████████████▉                       | 3011/8000 [45:22<1:12:02,  1.15it/s]

Episode 3011/8000, real env return = -107.26


 38%|█████████████▉                       | 3021/8000 [45:31<1:12:18,  1.15it/s]

Episode 3021/8000, real env return = -108.68


 38%|██████████████                       | 3031/8000 [45:40<1:13:30,  1.13it/s]

Episode 3031/8000, real env return = -107.07


 38%|██████████████                       | 3041/8000 [45:49<1:12:14,  1.14it/s]

Episode 3041/8000, real env return = -108.55


 38%|██████████████                       | 3051/8000 [45:58<1:13:44,  1.12it/s]

Episode 3051/8000, real env return = -107.78


 38%|██████████████▏                      | 3061/8000 [46:07<1:13:32,  1.12it/s]

Episode 3061/8000, real env return = -111.46


 38%|██████████████▏                      | 3071/8000 [46:16<1:13:11,  1.12it/s]

Episode 3071/8000, real env return = -110.85


 39%|██████████████▏                      | 3081/8000 [46:25<1:14:06,  1.11it/s]

Episode 3081/8000, real env return = -109.25


 39%|██████████████▎                      | 3091/8000 [46:34<1:19:14,  1.03it/s]

Episode 3091/8000, real env return = -107.45


 39%|██████████████▎                      | 3101/8000 [46:42<1:09:39,  1.17it/s]

Episode 3101/8000, real env return = -108.10


 39%|██████████████▍                      | 3111/8000 [46:51<1:07:40,  1.20it/s]

Episode 3111/8000, real env return = -107.56


 39%|██████████████▍                      | 3121/8000 [47:00<1:09:16,  1.17it/s]

Episode 3121/8000, real env return = -105.96


 39%|██████████████▍                      | 3131/8000 [47:09<1:17:25,  1.05it/s]

Episode 3131/8000, real env return = -101.37


 39%|██████████████▌                      | 3141/8000 [47:18<1:14:12,  1.09it/s]

Episode 3141/8000, real env return = -102.74


 39%|██████████████▌                      | 3151/8000 [47:26<1:10:58,  1.14it/s]

Episode 3151/8000, real env return = -101.75


 40%|██████████████▌                      | 3161/8000 [47:35<1:11:04,  1.13it/s]

Episode 3161/8000, real env return = -106.99


 40%|██████████████▋                      | 3171/8000 [47:44<1:12:52,  1.10it/s]

Episode 3171/8000, real env return = -107.71


 40%|██████████████▋                      | 3181/8000 [47:53<1:08:56,  1.17it/s]

Episode 3181/8000, real env return = -106.98


 40%|██████████████▊                      | 3191/8000 [48:02<1:07:32,  1.19it/s]

Episode 3191/8000, real env return = -106.86


 40%|██████████████▊                      | 3201/8000 [48:10<1:09:16,  1.15it/s]

Episode 3201/8000, real env return = -107.72


 40%|██████████████▊                      | 3211/8000 [48:19<1:09:42,  1.15it/s]

Episode 3211/8000, real env return = -107.42


 40%|██████████████▉                      | 3221/8000 [48:28<1:11:41,  1.11it/s]

Episode 3221/8000, real env return = -107.49


 40%|██████████████▉                      | 3231/8000 [48:37<1:12:07,  1.10it/s]

Episode 3231/8000, real env return = -107.30


 41%|██████████████▉                      | 3241/8000 [48:46<1:11:55,  1.10it/s]

Episode 3241/8000, real env return = -106.88


 41%|███████████████                      | 3251/8000 [48:55<1:10:35,  1.12it/s]

Episode 3251/8000, real env return = -107.87


 41%|███████████████                      | 3261/8000 [49:05<1:15:29,  1.05it/s]

Episode 3261/8000, real env return = -107.95


 41%|███████████████▏                     | 3271/8000 [49:13<1:12:10,  1.09it/s]

Episode 3271/8000, real env return = -106.29


 41%|███████████████▏                     | 3281/8000 [49:22<1:11:34,  1.10it/s]

Episode 3281/8000, real env return = -106.38


 41%|███████████████▏                     | 3291/8000 [49:31<1:09:00,  1.14it/s]

Episode 3291/8000, real env return = -107.54


 41%|███████████████▎                     | 3301/8000 [49:40<1:09:39,  1.12it/s]

Episode 3301/8000, real env return = -106.88


 41%|███████████████▎                     | 3311/8000 [49:49<1:07:54,  1.15it/s]

Episode 3311/8000, real env return = -107.91


 42%|███████████████▎                     | 3321/8000 [49:57<1:09:38,  1.12it/s]

Episode 3321/8000, real env return = -107.21


 42%|███████████████▍                     | 3331/8000 [50:06<1:08:22,  1.14it/s]

Episode 3331/8000, real env return = -107.33


 42%|███████████████▍                     | 3341/8000 [50:15<1:07:24,  1.15it/s]

Episode 3341/8000, real env return = -107.92


 42%|███████████████▍                     | 3351/8000 [50:24<1:07:56,  1.14it/s]

Episode 3351/8000, real env return = -99.93


 42%|███████████████▌                     | 3361/8000 [50:32<1:08:49,  1.12it/s]

Episode 3361/8000, real env return = -102.01


 42%|███████████████▌                     | 3371/8000 [50:41<1:05:36,  1.18it/s]

Episode 3371/8000, real env return = -101.74


 42%|███████████████▋                     | 3381/8000 [50:50<1:05:01,  1.18it/s]

Episode 3381/8000, real env return = -99.67


 42%|███████████████▋                     | 3391/8000 [50:58<1:05:30,  1.17it/s]

Episode 3391/8000, real env return = -99.71


 43%|███████████████▋                     | 3401/8000 [51:07<1:05:38,  1.17it/s]

Episode 3401/8000, real env return = -101.20


 43%|███████████████▊                     | 3411/8000 [51:16<1:08:43,  1.11it/s]

Episode 3411/8000, real env return = -101.67


 43%|███████████████▊                     | 3421/8000 [51:25<1:05:19,  1.17it/s]

Episode 3421/8000, real env return = -101.62


 43%|███████████████▊                     | 3431/8000 [51:33<1:05:01,  1.17it/s]

Episode 3431/8000, real env return = -100.58


 43%|███████████████▉                     | 3441/8000 [51:42<1:05:03,  1.17it/s]

Episode 3441/8000, real env return = -102.00


 43%|███████████████▉                     | 3451/8000 [51:51<1:04:02,  1.18it/s]

Episode 3451/8000, real env return = -101.52


 43%|████████████████                     | 3461/8000 [51:59<1:04:29,  1.17it/s]

Episode 3461/8000, real env return = -101.51


 43%|████████████████                     | 3471/8000 [52:08<1:03:38,  1.19it/s]

Episode 3471/8000, real env return = -101.74


 44%|████████████████                     | 3481/8000 [52:16<1:06:04,  1.14it/s]

Episode 3481/8000, real env return = -101.90


 44%|████████████████▏                    | 3491/8000 [52:25<1:05:50,  1.14it/s]

Episode 3491/8000, real env return = -101.75


 44%|████████████████▏                    | 3501/8000 [52:33<1:03:27,  1.18it/s]

Episode 3501/8000, real env return = -101.32


 44%|████████████████▏                    | 3511/8000 [52:42<1:03:07,  1.19it/s]

Episode 3511/8000, real env return = -101.56


 44%|████████████████▎                    | 3521/8000 [52:51<1:02:50,  1.19it/s]

Episode 3521/8000, real env return = -102.02


 44%|████████████████▎                    | 3531/8000 [52:59<1:04:28,  1.16it/s]

Episode 3531/8000, real env return = -101.85


 44%|████████████████▍                    | 3541/8000 [53:08<1:05:53,  1.13it/s]

Episode 3541/8000, real env return = -101.52


 44%|████████████████▍                    | 3551/8000 [53:16<1:06:36,  1.11it/s]

Episode 3551/8000, real env return = -101.82


 45%|████████████████▍                    | 3561/8000 [53:25<1:05:43,  1.13it/s]

Episode 3561/8000, real env return = -101.93


 45%|████████████████▌                    | 3571/8000 [53:34<1:04:18,  1.15it/s]

Episode 3571/8000, real env return = -101.70


 45%|████████████████▌                    | 3581/8000 [53:42<1:04:21,  1.14it/s]

Episode 3581/8000, real env return = -101.52


 45%|████████████████▌                    | 3591/8000 [53:51<1:05:33,  1.12it/s]

Episode 3591/8000, real env return = -101.66


 45%|████████████████▋                    | 3601/8000 [54:00<1:08:18,  1.07it/s]

Episode 3601/8000, real env return = -101.58


 45%|████████████████▋                    | 3611/8000 [54:10<1:07:19,  1.09it/s]

Episode 3611/8000, real env return = -100.54


 45%|████████████████▋                    | 3621/8000 [54:19<1:05:06,  1.12it/s]

Episode 3621/8000, real env return = -101.75


 45%|████████████████▊                    | 3631/8000 [54:28<1:02:35,  1.16it/s]

Episode 3631/8000, real env return = -101.43


 46%|████████████████▊                    | 3641/8000 [54:36<1:03:10,  1.15it/s]

Episode 3641/8000, real env return = -101.72


 46%|████████████████▉                    | 3651/8000 [54:45<1:03:35,  1.14it/s]

Episode 3651/8000, real env return = -101.55


 46%|████████████████▉                    | 3661/8000 [54:53<1:01:14,  1.18it/s]

Episode 3661/8000, real env return = -101.36


 46%|█████████████████▉                     | 3671/8000 [55:02<59:57,  1.20it/s]

Episode 3671/8000, real env return = -105.85


 46%|█████████████████                    | 3681/8000 [55:11<1:01:39,  1.17it/s]

Episode 3681/8000, real env return = -101.39


 46%|█████████████████                    | 3691/8000 [55:19<1:01:09,  1.17it/s]

Episode 3691/8000, real env return = -101.64


 46%|█████████████████                    | 3701/8000 [55:28<1:01:15,  1.17it/s]

Episode 3701/8000, real env return = -101.40


 46%|█████████████████▏                   | 3711/8000 [55:36<1:03:17,  1.13it/s]

Episode 3711/8000, real env return = -101.83


 47%|█████████████████▏                   | 3721/8000 [55:45<1:00:23,  1.18it/s]

Episode 3721/8000, real env return = -101.36


 47%|█████████████████▎                   | 3731/8000 [55:53<1:02:37,  1.14it/s]

Episode 3731/8000, real env return = -101.63


 47%|█████████████████▎                   | 3741/8000 [56:02<1:02:18,  1.14it/s]

Episode 3741/8000, real env return = -101.80


 47%|█████████████████▎                   | 3751/8000 [56:11<1:06:15,  1.07it/s]

Episode 3751/8000, real env return = -101.01


 47%|██████████████████▎                    | 3761/8000 [56:20<59:49,  1.18it/s]

Episode 3761/8000, real env return = -101.39


 47%|█████████████████▍                   | 3771/8000 [56:28<1:01:02,  1.15it/s]

Episode 3771/8000, real env return = -106.08


 47%|█████████████████▍                   | 3781/8000 [56:37<1:01:23,  1.15it/s]

Episode 3781/8000, real env return = -109.32


 47%|██████████████████▍                    | 3791/8000 [56:45<56:53,  1.23it/s]

Episode 3791/8000, real env return = -101.47


 48%|██████████████████▌                    | 3801/8000 [56:54<58:31,  1.20it/s]

Episode 3801/8000, real env return = -101.54


 48%|██████████████████▌                    | 3811/8000 [57:02<59:04,  1.18it/s]

Episode 3811/8000, real env return = -101.15


 48%|██████████████████▋                    | 3821/8000 [57:11<58:47,  1.18it/s]

Episode 3821/8000, real env return = -101.48


 48%|█████████████████▋                   | 3831/8000 [57:20<1:00:36,  1.15it/s]

Episode 3831/8000, real env return = -101.33


 48%|█████████████████▊                   | 3841/8000 [57:28<1:00:32,  1.14it/s]

Episode 3841/8000, real env return = -101.42


 48%|█████████████████▊                   | 3851/8000 [57:37<1:01:44,  1.12it/s]

Episode 3851/8000, real env return = -101.55


 48%|██████████████████▊                    | 3861/8000 [57:46<58:50,  1.17it/s]

Episode 3861/8000, real env return = -107.74


 48%|█████████████████▉                   | 3871/8000 [57:55<1:00:49,  1.13it/s]

Episode 3871/8000, real env return = -107.70


 49%|█████████████████▉                   | 3881/8000 [58:03<1:00:00,  1.14it/s]

Episode 3881/8000, real env return = -106.98


 49%|█████████████████▉                   | 3891/8000 [58:12<1:00:29,  1.13it/s]

Episode 3891/8000, real env return = -100.76


 49%|██████████████████                   | 3901/8000 [58:21<1:00:36,  1.13it/s]

Episode 3901/8000, real env return = -100.94


 49%|██████████████████                   | 3911/8000 [58:30<1:06:16,  1.03it/s]

Episode 3911/8000, real env return = -100.95


 49%|██████████████████▏                  | 3921/8000 [58:39<1:02:46,  1.08it/s]

Episode 3921/8000, real env return = -101.80


 49%|███████████████████▏                   | 3931/8000 [58:48<57:44,  1.17it/s]

Episode 3931/8000, real env return = -101.97


 49%|███████████████████▏                   | 3941/8000 [58:56<58:55,  1.15it/s]

Episode 3941/8000, real env return = -102.25


 49%|███████████████████▎                   | 3951/8000 [59:05<57:06,  1.18it/s]

Episode 3951/8000, real env return = -101.78


 50%|███████████████████▎                   | 3961/8000 [59:13<56:58,  1.18it/s]

Episode 3961/8000, real env return = -102.41


 50%|███████████████████▎                   | 3971/8000 [59:22<59:14,  1.13it/s]

Episode 3971/8000, real env return = -102.10


 50%|███████████████████▍                   | 3981/8000 [59:31<56:18,  1.19it/s]

Episode 3981/8000, real env return = -104.99


 50%|██████████████████▍                  | 3991/8000 [59:40<1:04:55,  1.03it/s]

Episode 3991/8000, real env return = -101.03


 50%|███████████████████▌                   | 4001/8000 [59:48<55:36,  1.20it/s]

Episode 4001/8000, real env return = -102.06


 50%|███████████████████▌                   | 4011/8000 [59:56<55:12,  1.20it/s]

Episode 4011/8000, real env return = -102.12


 50%|██████████████████▌                  | 4021/8000 [1:00:05<56:20,  1.18it/s]

Episode 4021/8000, real env return = -101.86


 50%|██████████████████▋                  | 4031/8000 [1:00:14<55:58,  1.18it/s]

Episode 4031/8000, real env return = -101.12


 51%|██████████████████▋                  | 4041/8000 [1:00:22<56:00,  1.18it/s]

Episode 4041/8000, real env return = -107.08


 51%|██████████████████▋                  | 4051/8000 [1:00:31<56:16,  1.17it/s]

Episode 4051/8000, real env return = -107.00


 51%|██████████████████▊                  | 4061/8000 [1:00:39<54:37,  1.20it/s]

Episode 4061/8000, real env return = -100.39


 51%|██████████████████▊                  | 4071/8000 [1:00:48<55:34,  1.18it/s]

Episode 4071/8000, real env return = -101.99


 51%|██████████████████▊                  | 4081/8000 [1:00:56<55:32,  1.18it/s]

Episode 4081/8000, real env return = -102.15


 51%|██████████████████▉                  | 4091/8000 [1:01:05<54:25,  1.20it/s]

Episode 4091/8000, real env return = -102.19


 51%|██████████████████▉                  | 4101/8000 [1:01:13<56:07,  1.16it/s]

Episode 4101/8000, real env return = -107.67


 51%|███████████████████                  | 4111/8000 [1:01:22<56:03,  1.16it/s]

Episode 4111/8000, real env return = -107.73


 52%|███████████████████                  | 4121/8000 [1:01:31<56:50,  1.14it/s]

Episode 4121/8000, real env return = -108.71


 52%|███████████████████                  | 4131/8000 [1:01:40<57:20,  1.12it/s]

Episode 4131/8000, real env return = -101.52


 52%|███████████████████▏                 | 4141/8000 [1:01:49<59:01,  1.09it/s]

Episode 4141/8000, real env return = -101.85


 52%|██████████████████▏                | 4151/8000 [1:01:58<1:00:46,  1.06it/s]

Episode 4151/8000, real env return = -102.38


 52%|███████████████████▏                 | 4161/8000 [1:02:07<57:37,  1.11it/s]

Episode 4161/8000, real env return = -101.90


 52%|███████████████████▎                 | 4171/8000 [1:02:16<55:37,  1.15it/s]

Episode 4171/8000, real env return = -101.77


 52%|███████████████████▎                 | 4181/8000 [1:02:25<56:05,  1.13it/s]

Episode 4181/8000, real env return = -101.20


 52%|███████████████████▍                 | 4191/8000 [1:02:34<57:03,  1.11it/s]

Episode 4191/8000, real env return = -102.06


 53%|███████████████████▍                 | 4201/8000 [1:02:42<54:01,  1.17it/s]

Episode 4201/8000, real env return = -101.44


 53%|███████████████████▍                 | 4211/8000 [1:02:51<53:11,  1.19it/s]

Episode 4211/8000, real env return = -101.94


 53%|███████████████████▌                 | 4221/8000 [1:02:59<52:59,  1.19it/s]

Episode 4221/8000, real env return = -101.97


 53%|███████████████████▌                 | 4231/8000 [1:03:08<53:04,  1.18it/s]

Episode 4231/8000, real env return = -102.08


 53%|███████████████████▌                 | 4241/8000 [1:03:16<52:45,  1.19it/s]

Episode 4241/8000, real env return = -111.85


 53%|███████████████████▋                 | 4251/8000 [1:03:26<56:22,  1.11it/s]

Episode 4251/8000, real env return = -106.38


 53%|███████████████████▋                 | 4261/8000 [1:03:35<54:53,  1.14it/s]

Episode 4261/8000, real env return = -101.44


 53%|███████████████████▊                 | 4271/8000 [1:03:43<52:25,  1.19it/s]

Episode 4271/8000, real env return = -102.14


 54%|███████████████████▊                 | 4281/8000 [1:03:52<54:24,  1.14it/s]

Episode 4281/8000, real env return = -100.93


 54%|███████████████████▊                 | 4291/8000 [1:04:00<51:39,  1.20it/s]

Episode 4291/8000, real env return = -109.12


 54%|███████████████████▉                 | 4301/8000 [1:04:09<52:12,  1.18it/s]

Episode 4301/8000, real env return = -106.10


 54%|███████████████████▉                 | 4311/8000 [1:04:17<53:11,  1.16it/s]

Episode 4311/8000, real env return = -101.43


 54%|███████████████████▉                 | 4321/8000 [1:04:26<53:16,  1.15it/s]

Episode 4321/8000, real env return = -102.76


 54%|████████████████████                 | 4331/8000 [1:04:34<53:26,  1.14it/s]

Episode 4331/8000, real env return = -101.72


 54%|████████████████████                 | 4341/8000 [1:04:43<51:07,  1.19it/s]

Episode 4341/8000, real env return = -102.07


 54%|████████████████████                 | 4351/8000 [1:04:52<53:36,  1.13it/s]

Episode 4351/8000, real env return = -101.94


 55%|████████████████████▏                | 4361/8000 [1:05:00<54:11,  1.12it/s]

Episode 4361/8000, real env return = -101.13


 55%|████████████████████▏                | 4371/8000 [1:05:09<52:41,  1.15it/s]

Episode 4371/8000, real env return = -101.51


 55%|████████████████████▎                | 4381/8000 [1:05:18<52:25,  1.15it/s]

Episode 4381/8000, real env return = -102.28


 55%|████████████████████▎                | 4391/8000 [1:05:26<52:30,  1.15it/s]

Episode 4391/8000, real env return = -101.94


 55%|████████████████████▎                | 4401/8000 [1:05:35<52:11,  1.15it/s]

Episode 4401/8000, real env return = -102.50


 55%|████████████████████▍                | 4411/8000 [1:05:44<53:15,  1.12it/s]

Episode 4411/8000, real env return = -101.11


 55%|████████████████████▍                | 4421/8000 [1:05:53<51:42,  1.15it/s]

Episode 4421/8000, real env return = -102.17


 55%|████████████████████▍                | 4431/8000 [1:06:01<52:14,  1.14it/s]

Episode 4431/8000, real env return = -101.16


 56%|████████████████████▌                | 4441/8000 [1:06:10<52:48,  1.12it/s]

Episode 4441/8000, real env return = -102.64


 56%|████████████████████▌                | 4451/8000 [1:06:19<50:40,  1.17it/s]

Episode 4451/8000, real env return = -102.40


 56%|████████████████████▋                | 4461/8000 [1:06:27<49:53,  1.18it/s]

Episode 4461/8000, real env return = -101.99


 56%|████████████████████▋                | 4471/8000 [1:06:36<50:30,  1.16it/s]

Episode 4471/8000, real env return = -102.52


 56%|████████████████████▋                | 4481/8000 [1:06:45<50:55,  1.15it/s]

Episode 4481/8000, real env return = -102.47


 56%|████████████████████▊                | 4491/8000 [1:06:53<51:20,  1.14it/s]

Episode 4491/8000, real env return = -102.78


 56%|████████████████████▊                | 4501/8000 [1:07:02<50:04,  1.16it/s]

Episode 4501/8000, real env return = -106.99


 56%|████████████████████▊                | 4511/8000 [1:07:10<49:05,  1.18it/s]

Episode 4511/8000, real env return = -102.36


 57%|████████████████████▉                | 4521/8000 [1:07:19<48:38,  1.19it/s]

Episode 4521/8000, real env return = -102.99


 57%|████████████████████▉                | 4531/8000 [1:07:28<51:58,  1.11it/s]

Episode 4531/8000, real env return = -103.74


 57%|█████████████████████                | 4541/8000 [1:07:36<49:24,  1.17it/s]

Episode 4541/8000, real env return = -102.86


 57%|█████████████████████                | 4551/8000 [1:07:45<47:45,  1.20it/s]

Episode 4551/8000, real env return = -101.77


 57%|█████████████████████                | 4561/8000 [1:07:53<49:51,  1.15it/s]

Episode 4561/8000, real env return = -101.26


 57%|█████████████████████▏               | 4571/8000 [1:08:02<48:51,  1.17it/s]

Episode 4571/8000, real env return = -100.30


 57%|█████████████████████▏               | 4581/8000 [1:08:11<50:52,  1.12it/s]

Episode 4581/8000, real env return = -100.99


 57%|█████████████████████▏               | 4591/8000 [1:08:20<54:07,  1.05it/s]

Episode 4591/8000, real env return = -101.51


 58%|█████████████████████▎               | 4601/8000 [1:08:28<48:16,  1.17it/s]

Episode 4601/8000, real env return = -101.37


 58%|█████████████████████▎               | 4611/8000 [1:08:37<50:12,  1.13it/s]

Episode 4611/8000, real env return = -101.35


 58%|█████████████████████▎               | 4621/8000 [1:08:46<48:40,  1.16it/s]

Episode 4621/8000, real env return = -101.86


 58%|█████████████████████▍               | 4631/8000 [1:08:55<51:06,  1.10it/s]

Episode 4631/8000, real env return = -100.63


 58%|█████████████████████▍               | 4641/8000 [1:09:04<49:00,  1.14it/s]

Episode 4641/8000, real env return = -100.02


 58%|█████████████████████▌               | 4651/8000 [1:09:13<50:59,  1.09it/s]

Episode 4651/8000, real env return = -100.30


 58%|█████████████████████▌               | 4661/8000 [1:09:22<50:12,  1.11it/s]

Episode 4661/8000, real env return = -99.39


 58%|█████████████████████▌               | 4671/8000 [1:09:31<49:29,  1.12it/s]

Episode 4671/8000, real env return = -101.00


 59%|█████████████████████▋               | 4681/8000 [1:09:39<48:53,  1.13it/s]

Episode 4681/8000, real env return = -100.61


 59%|█████████████████████▋               | 4691/8000 [1:09:48<47:29,  1.16it/s]

Episode 4691/8000, real env return = -103.71


 59%|█████████████████████▋               | 4701/8000 [1:09:57<48:25,  1.14it/s]

Episode 4701/8000, real env return = -100.90


 59%|█████████████████████▊               | 4711/8000 [1:10:06<48:17,  1.14it/s]

Episode 4711/8000, real env return = -102.07


 59%|█████████████████████▊               | 4721/8000 [1:10:14<47:18,  1.16it/s]

Episode 4721/8000, real env return = -103.26


 59%|█████████████████████▉               | 4731/8000 [1:10:23<48:19,  1.13it/s]

Episode 4731/8000, real env return = -100.80


 59%|█████████████████████▉               | 4741/8000 [1:10:31<45:57,  1.18it/s]

Episode 4741/8000, real env return = -102.98


 59%|█████████████████████▉               | 4751/8000 [1:10:40<46:07,  1.17it/s]

Episode 4751/8000, real env return = -103.59


 60%|██████████████████████               | 4761/8000 [1:10:49<46:03,  1.17it/s]

Episode 4761/8000, real env return = -102.73


 60%|██████████████████████               | 4771/8000 [1:10:57<45:37,  1.18it/s]

Episode 4771/8000, real env return = -101.11


 60%|██████████████████████               | 4781/8000 [1:11:06<46:48,  1.15it/s]

Episode 4781/8000, real env return = -102.30


 60%|██████████████████████▏              | 4791/8000 [1:11:14<45:35,  1.17it/s]

Episode 4791/8000, real env return = -101.92


 60%|██████████████████████▏              | 4801/8000 [1:11:23<47:42,  1.12it/s]

Episode 4801/8000, real env return = -102.16


 60%|██████████████████████▎              | 4811/8000 [1:11:31<44:37,  1.19it/s]

Episode 4811/8000, real env return = -108.84


 60%|██████████████████████▎              | 4821/8000 [1:11:40<44:06,  1.20it/s]

Episode 4821/8000, real env return = -101.19


 60%|██████████████████████▎              | 4831/8000 [1:11:48<46:13,  1.14it/s]

Episode 4831/8000, real env return = -101.81


 61%|██████████████████████▍              | 4841/8000 [1:11:57<42:54,  1.23it/s]

Episode 4841/8000, real env return = -100.63


 61%|██████████████████████▍              | 4851/8000 [1:12:05<44:10,  1.19it/s]

Episode 4851/8000, real env return = -100.64


 61%|██████████████████████▍              | 4861/8000 [1:12:14<45:14,  1.16it/s]

Episode 4861/8000, real env return = -101.83


 61%|██████████████████████▌              | 4871/8000 [1:12:22<44:35,  1.17it/s]

Episode 4871/8000, real env return = -102.91


 61%|██████████████████████▌              | 4881/8000 [1:12:31<43:47,  1.19it/s]

Episode 4881/8000, real env return = -102.17


 61%|██████████████████████▌              | 4891/8000 [1:12:40<45:50,  1.13it/s]

Episode 4891/8000, real env return = -100.99


 61%|██████████████████████▋              | 4901/8000 [1:12:49<45:06,  1.15it/s]

Episode 4901/8000, real env return = -104.08


 61%|██████████████████████▋              | 4911/8000 [1:12:57<46:00,  1.12it/s]

Episode 4911/8000, real env return = -103.55


 62%|██████████████████████▊              | 4921/8000 [1:13:06<44:33,  1.15it/s]

Episode 4921/8000, real env return = -103.05


 62%|██████████████████████▊              | 4931/8000 [1:13:15<45:04,  1.13it/s]

Episode 4931/8000, real env return = -102.88


 62%|██████████████████████▊              | 4941/8000 [1:13:24<45:55,  1.11it/s]

Episode 4941/8000, real env return = -101.67


 62%|██████████████████████▉              | 4951/8000 [1:13:33<46:56,  1.08it/s]

Episode 4951/8000, real env return = -103.68


 62%|██████████████████████▉              | 4961/8000 [1:13:42<43:23,  1.17it/s]

Episode 4961/8000, real env return = -105.05


 62%|██████████████████████▉              | 4971/8000 [1:13:50<44:47,  1.13it/s]

Episode 4971/8000, real env return = -103.63


 62%|███████████████████████              | 4981/8000 [1:13:59<45:16,  1.11it/s]

Episode 4981/8000, real env return = -101.53


 62%|███████████████████████              | 4991/8000 [1:14:08<42:42,  1.17it/s]

Episode 4991/8000, real env return = -102.11


 63%|███████████████████████▏             | 5001/8000 [1:14:16<43:45,  1.14it/s]

Episode 5001/8000, real env return = -109.86


 63%|███████████████████████▏             | 5011/8000 [1:14:25<41:16,  1.21it/s]

Episode 5011/8000, real env return = -108.96


 63%|███████████████████████▏             | 5021/8000 [1:14:33<41:53,  1.19it/s]

Episode 5021/8000, real env return = -109.67


 63%|███████████████████████▎             | 5031/8000 [1:14:42<41:46,  1.18it/s]

Episode 5031/8000, real env return = -109.93


 63%|███████████████████████▎             | 5041/8000 [1:14:50<41:28,  1.19it/s]

Episode 5041/8000, real env return = -108.51


 63%|███████████████████████▎             | 5051/8000 [1:14:59<41:30,  1.18it/s]

Episode 5051/8000, real env return = -104.54


 63%|███████████████████████▍             | 5061/8000 [1:15:07<40:58,  1.20it/s]

Episode 5061/8000, real env return = -102.51


 63%|███████████████████████▍             | 5071/8000 [1:15:16<41:36,  1.17it/s]

Episode 5071/8000, real env return = -104.42


 64%|███████████████████████▍             | 5081/8000 [1:15:25<41:00,  1.19it/s]

Episode 5081/8000, real env return = -104.42


 64%|███████████████████████▌             | 5091/8000 [1:15:33<41:34,  1.17it/s]

Episode 5091/8000, real env return = -104.01


 64%|███████████████████████▌             | 5101/8000 [1:15:42<40:32,  1.19it/s]

Episode 5101/8000, real env return = -102.43


 64%|███████████████████████▋             | 5111/8000 [1:15:50<40:14,  1.20it/s]

Episode 5111/8000, real env return = -105.01


 64%|███████████████████████▋             | 5121/8000 [1:15:59<41:24,  1.16it/s]

Episode 5121/8000, real env return = -102.04


 64%|███████████████████████▋             | 5131/8000 [1:16:07<41:15,  1.16it/s]

Episode 5131/8000, real env return = -103.77


 64%|███████████████████████▊             | 5141/8000 [1:16:16<42:11,  1.13it/s]

Episode 5141/8000, real env return = -105.16


 64%|███████████████████████▊             | 5151/8000 [1:16:25<41:14,  1.15it/s]

Episode 5151/8000, real env return = -104.65


 65%|███████████████████████▊             | 5161/8000 [1:16:33<40:27,  1.17it/s]

Episode 5161/8000, real env return = -104.72


 65%|███████████████████████▉             | 5171/8000 [1:16:42<42:03,  1.12it/s]

Episode 5171/8000, real env return = -104.30


 65%|███████████████████████▉             | 5181/8000 [1:16:51<40:39,  1.16it/s]

Episode 5181/8000, real env return = -105.52


 65%|████████████████████████             | 5191/8000 [1:17:00<39:19,  1.19it/s]

Episode 5191/8000, real env return = -103.67


 65%|████████████████████████             | 5201/8000 [1:17:09<41:41,  1.12it/s]

Episode 5201/8000, real env return = -105.27


 65%|████████████████████████             | 5211/8000 [1:17:17<41:11,  1.13it/s]

Episode 5211/8000, real env return = -105.82


 65%|████████████████████████▏            | 5221/8000 [1:17:26<41:05,  1.13it/s]

Episode 5221/8000, real env return = -105.59


 65%|████████████████████████▏            | 5231/8000 [1:17:35<40:54,  1.13it/s]

Episode 5231/8000, real env return = -105.76


 66%|████████████████████████▏            | 5241/8000 [1:17:43<39:21,  1.17it/s]

Episode 5241/8000, real env return = -105.33


 66%|████████████████████████▎            | 5251/8000 [1:17:52<39:51,  1.15it/s]

Episode 5251/8000, real env return = -105.58


 66%|████████████████████████▎            | 5261/8000 [1:18:01<40:07,  1.14it/s]

Episode 5261/8000, real env return = -105.05


 66%|████████████████████████▍            | 5271/8000 [1:18:09<38:52,  1.17it/s]

Episode 5271/8000, real env return = -105.43


 66%|████████████████████████▍            | 5281/8000 [1:18:18<38:21,  1.18it/s]

Episode 5281/8000, real env return = -105.12


 66%|████████████████████████▍            | 5291/8000 [1:18:26<37:39,  1.20it/s]

Episode 5291/8000, real env return = -104.78


 66%|████████████████████████▌            | 5301/8000 [1:18:35<37:57,  1.18it/s]

Episode 5301/8000, real env return = -103.90


 66%|████████████████████████▌            | 5311/8000 [1:18:44<37:59,  1.18it/s]

Episode 5311/8000, real env return = -105.45


 67%|████████████████████████▌            | 5321/8000 [1:18:52<37:38,  1.19it/s]

Episode 5321/8000, real env return = -105.12


 67%|████████████████████████▋            | 5331/8000 [1:19:01<37:00,  1.20it/s]

Episode 5331/8000, real env return = -104.97


 67%|████████████████████████▋            | 5341/8000 [1:19:09<37:58,  1.17it/s]

Episode 5341/8000, real env return = -120.83


 67%|████████████████████████▋            | 5351/8000 [1:19:18<38:15,  1.15it/s]

Episode 5351/8000, real env return = -101.14


 67%|████████████████████████▊            | 5361/8000 [1:19:27<39:05,  1.13it/s]

Episode 5361/8000, real env return = -100.29


 67%|████████████████████████▊            | 5371/8000 [1:19:35<37:27,  1.17it/s]

Episode 5371/8000, real env return = -100.19


 67%|████████████████████████▉            | 5381/8000 [1:19:44<38:56,  1.12it/s]

Episode 5381/8000, real env return = -99.36


 67%|████████████████████████▉            | 5391/8000 [1:19:53<39:10,  1.11it/s]

Episode 5391/8000, real env return = -100.10


 68%|████████████████████████▉            | 5401/8000 [1:20:02<38:54,  1.11it/s]

Episode 5401/8000, real env return = -100.44


 68%|█████████████████████████            | 5411/8000 [1:20:11<37:28,  1.15it/s]

Episode 5411/8000, real env return = -100.91


 68%|█████████████████████████            | 5421/8000 [1:20:19<37:46,  1.14it/s]

Episode 5421/8000, real env return = -103.70


 68%|█████████████████████████            | 5431/8000 [1:20:28<37:30,  1.14it/s]

Episode 5431/8000, real env return = -100.53


 68%|█████████████████████████▏           | 5441/8000 [1:20:37<37:59,  1.12it/s]

Episode 5441/8000, real env return = -100.09


 68%|█████████████████████████▏           | 5451/8000 [1:20:46<37:56,  1.12it/s]

Episode 5451/8000, real env return = -100.74


 68%|█████████████████████████▎           | 5461/8000 [1:20:55<37:03,  1.14it/s]

Episode 5461/8000, real env return = -100.20


 68%|█████████████████████████▎           | 5471/8000 [1:21:04<36:46,  1.15it/s]

Episode 5471/8000, real env return = -101.25


 69%|█████████████████████████▎           | 5481/8000 [1:21:12<36:53,  1.14it/s]

Episode 5481/8000, real env return = -102.14


 69%|█████████████████████████▍           | 5491/8000 [1:21:21<37:02,  1.13it/s]

Episode 5491/8000, real env return = -100.07


 69%|█████████████████████████▍           | 5501/8000 [1:21:30<37:10,  1.12it/s]

Episode 5501/8000, real env return = -99.75


 69%|█████████████████████████▍           | 5511/8000 [1:21:39<36:35,  1.13it/s]

Episode 5511/8000, real env return = -102.15


 69%|█████████████████████████▌           | 5521/8000 [1:21:47<34:21,  1.20it/s]

Episode 5521/8000, real env return = -100.46


 69%|█████████████████████████▌           | 5531/8000 [1:21:55<34:30,  1.19it/s]

Episode 5531/8000, real env return = -99.22


 69%|█████████████████████████▋           | 5541/8000 [1:22:04<34:30,  1.19it/s]

Episode 5541/8000, real env return = -102.17


 69%|█████████████████████████▋           | 5551/8000 [1:22:13<35:56,  1.14it/s]

Episode 5551/8000, real env return = -100.57


 70%|█████████████████████████▋           | 5561/8000 [1:22:21<33:55,  1.20it/s]

Episode 5561/8000, real env return = -100.32


 70%|█████████████████████████▊           | 5571/8000 [1:22:30<36:31,  1.11it/s]

Episode 5571/8000, real env return = -100.86


 70%|█████████████████████████▊           | 5581/8000 [1:22:39<34:05,  1.18it/s]

Episode 5581/8000, real env return = -100.57


 70%|█████████████████████████▊           | 5591/8000 [1:22:47<33:46,  1.19it/s]

Episode 5591/8000, real env return = -100.53


 70%|█████████████████████████▉           | 5601/8000 [1:22:56<34:37,  1.15it/s]

Episode 5601/8000, real env return = -100.90


 70%|█████████████████████████▉           | 5611/8000 [1:23:05<35:18,  1.13it/s]

Episode 5611/8000, real env return = -104.00


 70%|█████████████████████████▉           | 5621/8000 [1:23:13<34:05,  1.16it/s]

Episode 5621/8000, real env return = -104.95


 70%|██████████████████████████           | 5631/8000 [1:23:22<34:20,  1.15it/s]

Episode 5631/8000, real env return = -105.25


 71%|██████████████████████████           | 5641/8000 [1:23:31<35:02,  1.12it/s]

Episode 5641/8000, real env return = -103.50


 71%|██████████████████████████▏          | 5651/8000 [1:23:40<34:00,  1.15it/s]

Episode 5651/8000, real env return = -104.52


 71%|██████████████████████████▏          | 5661/8000 [1:23:48<33:58,  1.15it/s]

Episode 5661/8000, real env return = -105.55


 71%|██████████████████████████▏          | 5671/8000 [1:23:57<33:09,  1.17it/s]

Episode 5671/8000, real env return = -104.97


 71%|██████████████████████████▎          | 5681/8000 [1:24:06<33:29,  1.15it/s]

Episode 5681/8000, real env return = -103.73


 71%|██████████████████████████▎          | 5691/8000 [1:24:15<33:45,  1.14it/s]

Episode 5691/8000, real env return = -104.27


 71%|██████████████████████████▎          | 5701/8000 [1:24:23<33:52,  1.13it/s]

Episode 5701/8000, real env return = -104.64


 71%|██████████████████████████▍          | 5711/8000 [1:24:32<33:43,  1.13it/s]

Episode 5711/8000, real env return = -105.54


 72%|██████████████████████████▍          | 5721/8000 [1:24:41<33:51,  1.12it/s]

Episode 5721/8000, real env return = -105.18


 72%|██████████████████████████▌          | 5731/8000 [1:24:50<32:43,  1.16it/s]

Episode 5731/8000, real env return = -105.36


 72%|██████████████████████████▌          | 5741/8000 [1:24:59<33:53,  1.11it/s]

Episode 5741/8000, real env return = -105.03


 72%|██████████████████████████▌          | 5751/8000 [1:25:08<33:34,  1.12it/s]

Episode 5751/8000, real env return = -105.58


 72%|██████████████████████████▋          | 5761/8000 [1:25:17<33:05,  1.13it/s]

Episode 5761/8000, real env return = -105.48


 72%|██████████████████████████▋          | 5771/8000 [1:25:26<32:37,  1.14it/s]

Episode 5771/8000, real env return = -104.99


 72%|██████████████████████████▋          | 5781/8000 [1:25:35<32:20,  1.14it/s]

Episode 5781/8000, real env return = -102.60


 72%|██████████████████████████▊          | 5791/8000 [1:25:43<31:54,  1.15it/s]

Episode 5791/8000, real env return = -105.19


 73%|██████████████████████████▊          | 5801/8000 [1:25:52<30:37,  1.20it/s]

Episode 5801/8000, real env return = -105.37


 73%|██████████████████████████▉          | 5811/8000 [1:26:01<31:26,  1.16it/s]

Episode 5811/8000, real env return = -105.66


 73%|██████████████████████████▉          | 5821/8000 [1:26:09<32:00,  1.13it/s]

Episode 5821/8000, real env return = -104.86


 73%|██████████████████████████▉          | 5831/8000 [1:26:18<31:51,  1.14it/s]

Episode 5831/8000, real env return = -102.77


 73%|███████████████████████████          | 5841/8000 [1:26:27<31:32,  1.14it/s]

Episode 5841/8000, real env return = -100.86


 73%|███████████████████████████          | 5851/8000 [1:26:36<30:44,  1.17it/s]

Episode 5851/8000, real env return = -105.59


 73%|███████████████████████████          | 5861/8000 [1:26:44<31:03,  1.15it/s]

Episode 5861/8000, real env return = -106.15


 73%|███████████████████████████▏         | 5871/8000 [1:26:54<34:39,  1.02it/s]

Episode 5871/8000, real env return = -105.46


 74%|███████████████████████████▏         | 5881/8000 [1:27:02<30:08,  1.17it/s]

Episode 5881/8000, real env return = -104.84


 74%|███████████████████████████▏         | 5891/8000 [1:27:11<31:33,  1.11it/s]

Episode 5891/8000, real env return = -101.32


 74%|███████████████████████████▎         | 5901/8000 [1:27:20<30:24,  1.15it/s]

Episode 5901/8000, real env return = -105.74


 74%|███████████████████████████▎         | 5911/8000 [1:27:28<29:48,  1.17it/s]

Episode 5911/8000, real env return = -105.22


 74%|███████████████████████████▍         | 5921/8000 [1:27:37<30:33,  1.13it/s]

Episode 5921/8000, real env return = -100.98


 74%|███████████████████████████▍         | 5931/8000 [1:27:46<30:44,  1.12it/s]

Episode 5931/8000, real env return = -101.62


 74%|███████████████████████████▍         | 5941/8000 [1:27:54<28:43,  1.19it/s]

Episode 5941/8000, real env return = -100.93


 74%|███████████████████████████▌         | 5951/8000 [1:28:03<28:53,  1.18it/s]

Episode 5951/8000, real env return = -101.25


 75%|███████████████████████████▌         | 5961/8000 [1:28:11<28:09,  1.21it/s]

Episode 5961/8000, real env return = -103.98


 75%|███████████████████████████▌         | 5971/8000 [1:28:20<29:57,  1.13it/s]

Episode 5971/8000, real env return = -105.12


 75%|███████████████████████████▋         | 5981/8000 [1:28:29<29:54,  1.13it/s]

Episode 5981/8000, real env return = -105.27


 75%|███████████████████████████▋         | 5991/8000 [1:28:37<29:00,  1.15it/s]

Episode 5991/8000, real env return = -104.60


 75%|███████████████████████████▊         | 6001/8000 [1:28:46<28:49,  1.16it/s]

Episode 6001/8000, real env return = -104.54


 75%|███████████████████████████▊         | 6011/8000 [1:28:55<28:18,  1.17it/s]

Episode 6011/8000, real env return = -103.67


 75%|███████████████████████████▊         | 6021/8000 [1:29:03<28:18,  1.17it/s]

Episode 6021/8000, real env return = -105.11


 75%|███████████████████████████▉         | 6031/8000 [1:29:12<27:42,  1.18it/s]

Episode 6031/8000, real env return = -101.55


 76%|███████████████████████████▉         | 6041/8000 [1:29:21<28:15,  1.16it/s]

Episode 6041/8000, real env return = -101.99


 76%|███████████████████████████▉         | 6051/8000 [1:29:29<28:00,  1.16it/s]

Episode 6051/8000, real env return = -101.29


 76%|████████████████████████████         | 6061/8000 [1:29:38<28:44,  1.12it/s]

Episode 6061/8000, real env return = -101.62


 76%|████████████████████████████         | 6071/8000 [1:29:47<28:28,  1.13it/s]

Episode 6071/8000, real env return = -101.25


 76%|████████████████████████████         | 6081/8000 [1:29:55<28:44,  1.11it/s]

Episode 6081/8000, real env return = -101.58


 76%|████████████████████████████▏        | 6091/8000 [1:30:04<27:42,  1.15it/s]

Episode 6091/8000, real env return = -101.72


 76%|████████████████████████████▏        | 6101/8000 [1:30:13<27:16,  1.16it/s]

Episode 6101/8000, real env return = -106.67


 76%|████████████████████████████▎        | 6111/8000 [1:30:22<28:20,  1.11it/s]

Episode 6111/8000, real env return = -105.92


 77%|████████████████████████████▎        | 6121/8000 [1:30:31<27:47,  1.13it/s]

Episode 6121/8000, real env return = -106.07


 77%|████████████████████████████▎        | 6131/8000 [1:30:39<25:50,  1.21it/s]

Episode 6131/8000, real env return = -105.71


 77%|████████████████████████████▍        | 6141/8000 [1:30:48<26:18,  1.18it/s]

Episode 6141/8000, real env return = -106.40


 77%|████████████████████████████▍        | 6151/8000 [1:30:56<26:16,  1.17it/s]

Episode 6151/8000, real env return = -106.13


 77%|████████████████████████████▍        | 6161/8000 [1:31:05<27:13,  1.13it/s]

Episode 6161/8000, real env return = -105.82


 77%|████████████████████████████▌        | 6171/8000 [1:31:14<26:55,  1.13it/s]

Episode 6171/8000, real env return = -104.87


 77%|████████████████████████████▌        | 6181/8000 [1:31:23<26:53,  1.13it/s]

Episode 6181/8000, real env return = -107.19


 77%|████████████████████████████▋        | 6191/8000 [1:31:31<25:54,  1.16it/s]

Episode 6191/8000, real env return = -107.15


 78%|████████████████████████████▋        | 6201/8000 [1:31:40<25:33,  1.17it/s]

Episode 6201/8000, real env return = -106.94


 78%|████████████████████████████▋        | 6211/8000 [1:31:49<26:21,  1.13it/s]

Episode 6211/8000, real env return = -106.43


 78%|████████████████████████████▊        | 6221/8000 [1:31:57<26:13,  1.13it/s]

Episode 6221/8000, real env return = -100.95


 78%|████████████████████████████▊        | 6231/8000 [1:32:06<25:12,  1.17it/s]

Episode 6231/8000, real env return = -101.05


 78%|████████████████████████████▊        | 6241/8000 [1:32:15<24:53,  1.18it/s]

Episode 6241/8000, real env return = -101.36


 78%|████████████████████████████▉        | 6251/8000 [1:32:24<25:39,  1.14it/s]

Episode 6251/8000, real env return = -102.40


 78%|████████████████████████████▉        | 6261/8000 [1:32:32<25:48,  1.12it/s]

Episode 6261/8000, real env return = -104.44


 78%|█████████████████████████████        | 6271/8000 [1:32:41<24:34,  1.17it/s]

Episode 6271/8000, real env return = -104.42


 79%|█████████████████████████████        | 6281/8000 [1:32:50<25:19,  1.13it/s]

Episode 6281/8000, real env return = -117.51


 79%|█████████████████████████████        | 6291/8000 [1:32:59<24:10,  1.18it/s]

Episode 6291/8000, real env return = -101.99


 79%|█████████████████████████████▏       | 6301/8000 [1:33:08<25:15,  1.12it/s]

Episode 6301/8000, real env return = -101.37


 79%|█████████████████████████████▏       | 6311/8000 [1:33:16<25:24,  1.11it/s]

Episode 6311/8000, real env return = -102.27


 79%|█████████████████████████████▏       | 6321/8000 [1:33:25<24:59,  1.12it/s]

Episode 6321/8000, real env return = -103.02


 79%|█████████████████████████████▎       | 6331/8000 [1:33:34<24:41,  1.13it/s]

Episode 6331/8000, real env return = -101.75


 79%|█████████████████████████████▎       | 6341/8000 [1:33:43<24:25,  1.13it/s]

Episode 6341/8000, real env return = -104.36


 79%|█████████████████████████████▎       | 6351/8000 [1:33:52<24:00,  1.14it/s]

Episode 6351/8000, real env return = -102.66


 80%|█████████████████████████████▍       | 6361/8000 [1:34:01<24:08,  1.13it/s]

Episode 6361/8000, real env return = -104.13


 80%|█████████████████████████████▍       | 6371/8000 [1:34:10<24:09,  1.12it/s]

Episode 6371/8000, real env return = -103.19


 80%|█████████████████████████████▌       | 6381/8000 [1:34:19<24:08,  1.12it/s]

Episode 6381/8000, real env return = -102.58


 80%|█████████████████████████████▌       | 6391/8000 [1:34:27<23:53,  1.12it/s]

Episode 6391/8000, real env return = -105.03


 80%|█████████████████████████████▌       | 6401/8000 [1:34:36<23:52,  1.12it/s]

Episode 6401/8000, real env return = -101.46


 80%|█████████████████████████████▋       | 6411/8000 [1:34:45<23:04,  1.15it/s]

Episode 6411/8000, real env return = -102.02


 80%|█████████████████████████████▋       | 6421/8000 [1:34:54<23:13,  1.13it/s]

Episode 6421/8000, real env return = -103.61


 80%|█████████████████████████████▋       | 6431/8000 [1:35:03<23:31,  1.11it/s]

Episode 6431/8000, real env return = -102.73


 81%|█████████████████████████████▊       | 6441/8000 [1:35:12<22:50,  1.14it/s]

Episode 6441/8000, real env return = -102.91


 81%|█████████████████████████████▊       | 6451/8000 [1:35:21<22:58,  1.12it/s]

Episode 6451/8000, real env return = -103.14


 81%|█████████████████████████████▉       | 6461/8000 [1:35:29<22:12,  1.15it/s]

Episode 6461/8000, real env return = -103.83


 81%|█████████████████████████████▉       | 6471/8000 [1:35:38<22:38,  1.13it/s]

Episode 6471/8000, real env return = -101.73


 81%|█████████████████████████████▉       | 6481/8000 [1:35:47<22:07,  1.14it/s]

Episode 6481/8000, real env return = -103.48


 81%|██████████████████████████████       | 6491/8000 [1:35:56<22:18,  1.13it/s]

Episode 6491/8000, real env return = -103.84


 81%|██████████████████████████████       | 6501/8000 [1:36:05<23:07,  1.08it/s]

Episode 6501/8000, real env return = -121.55


 81%|██████████████████████████████       | 6511/8000 [1:36:14<21:46,  1.14it/s]

Episode 6511/8000, real env return = -117.86


 82%|██████████████████████████████▏      | 6521/8000 [1:36:23<21:50,  1.13it/s]

Episode 6521/8000, real env return = -117.50


 82%|██████████████████████████████▏      | 6531/8000 [1:36:32<22:08,  1.11it/s]

Episode 6531/8000, real env return = -121.46


 82%|██████████████████████████████▎      | 6541/8000 [1:36:41<21:37,  1.12it/s]

Episode 6541/8000, real env return = -119.68


 82%|██████████████████████████████▎      | 6551/8000 [1:36:50<21:45,  1.11it/s]

Episode 6551/8000, real env return = -118.50


 82%|██████████████████████████████▎      | 6561/8000 [1:36:59<21:49,  1.10it/s]

Episode 6561/8000, real env return = -114.03


 82%|██████████████████████████████▍      | 6571/8000 [1:37:08<21:40,  1.10it/s]

Episode 6571/8000, real env return = -103.30


 82%|██████████████████████████████▍      | 6581/8000 [1:37:17<20:59,  1.13it/s]

Episode 6581/8000, real env return = -104.92


 82%|██████████████████████████████▍      | 6591/8000 [1:37:25<20:43,  1.13it/s]

Episode 6591/8000, real env return = -104.06


 83%|██████████████████████████████▌      | 6601/8000 [1:37:34<20:51,  1.12it/s]

Episode 6601/8000, real env return = -102.98


 83%|██████████████████████████████▌      | 6611/8000 [1:37:43<21:05,  1.10it/s]

Episode 6611/8000, real env return = -102.76


 83%|██████████████████████████████▌      | 6621/8000 [1:37:52<20:27,  1.12it/s]

Episode 6621/8000, real env return = -102.98


 83%|██████████████████████████████▋      | 6631/8000 [1:38:01<20:17,  1.12it/s]

Episode 6631/8000, real env return = -103.45


 83%|██████████████████████████████▋      | 6641/8000 [1:38:10<20:53,  1.08it/s]

Episode 6641/8000, real env return = -103.54


 83%|██████████████████████████████▊      | 6651/8000 [1:38:19<20:29,  1.10it/s]

Episode 6651/8000, real env return = -102.27


 83%|██████████████████████████████▊      | 6661/8000 [1:38:28<19:53,  1.12it/s]

Episode 6661/8000, real env return = -103.75


 83%|██████████████████████████████▊      | 6671/8000 [1:38:37<20:32,  1.08it/s]

Episode 6671/8000, real env return = -103.88


 84%|██████████████████████████████▉      | 6681/8000 [1:38:45<18:59,  1.16it/s]

Episode 6681/8000, real env return = -103.41


 84%|██████████████████████████████▉      | 6691/8000 [1:38:54<19:44,  1.11it/s]

Episode 6691/8000, real env return = -103.34


 84%|██████████████████████████████▉      | 6701/8000 [1:39:04<19:24,  1.12it/s]

Episode 6701/8000, real env return = -102.88


 84%|███████████████████████████████      | 6711/8000 [1:39:13<19:32,  1.10it/s]

Episode 6711/8000, real env return = -103.38


 84%|███████████████████████████████      | 6721/8000 [1:39:22<19:07,  1.11it/s]

Episode 6721/8000, real env return = -120.29


 84%|███████████████████████████████▏     | 6731/8000 [1:39:31<19:16,  1.10it/s]

Episode 6731/8000, real env return = -120.51


 84%|███████████████████████████████▏     | 6741/8000 [1:39:40<19:10,  1.09it/s]

Episode 6741/8000, real env return = -121.28


 84%|███████████████████████████████▏     | 6751/8000 [1:39:49<18:53,  1.10it/s]

Episode 6751/8000, real env return = -114.56


 85%|███████████████████████████████▎     | 6761/8000 [1:39:58<18:26,  1.12it/s]

Episode 6761/8000, real env return = -114.41


 85%|███████████████████████████████▎     | 6771/8000 [1:40:07<18:11,  1.13it/s]

Episode 6771/8000, real env return = -116.40


 85%|███████████████████████████████▎     | 6781/8000 [1:40:16<18:05,  1.12it/s]

Episode 6781/8000, real env return = -119.08


 85%|███████████████████████████████▍     | 6791/8000 [1:40:25<17:39,  1.14it/s]

Episode 6791/8000, real env return = -115.98


 85%|███████████████████████████████▍     | 6801/8000 [1:40:34<17:36,  1.13it/s]

Episode 6801/8000, real env return = -118.13


 85%|███████████████████████████████▌     | 6811/8000 [1:40:43<17:49,  1.11it/s]

Episode 6811/8000, real env return = -116.52


 85%|███████████████████████████████▌     | 6821/8000 [1:40:52<17:39,  1.11it/s]

Episode 6821/8000, real env return = -117.40


 85%|███████████████████████████████▌     | 6831/8000 [1:41:01<16:53,  1.15it/s]

Episode 6831/8000, real env return = -118.17


 86%|███████████████████████████████▋     | 6841/8000 [1:41:09<17:02,  1.13it/s]

Episode 6841/8000, real env return = -116.02


 86%|███████████████████████████████▋     | 6851/8000 [1:41:18<17:01,  1.12it/s]

Episode 6851/8000, real env return = -116.60


 86%|███████████████████████████████▋     | 6861/8000 [1:41:27<16:40,  1.14it/s]

Episode 6861/8000, real env return = -116.96


 86%|███████████████████████████████▊     | 6871/8000 [1:41:36<16:57,  1.11it/s]

Episode 6871/8000, real env return = -116.93


 86%|███████████████████████████████▊     | 6881/8000 [1:41:45<16:37,  1.12it/s]

Episode 6881/8000, real env return = -116.65


 86%|███████████████████████████████▊     | 6891/8000 [1:41:54<16:29,  1.12it/s]

Episode 6891/8000, real env return = -118.37


 86%|███████████████████████████████▉     | 6901/8000 [1:42:03<16:09,  1.13it/s]

Episode 6901/8000, real env return = -117.01


 86%|███████████████████████████████▉     | 6911/8000 [1:42:12<16:17,  1.11it/s]

Episode 6911/8000, real env return = -116.78


 87%|████████████████████████████████     | 6921/8000 [1:42:21<15:52,  1.13it/s]

Episode 6921/8000, real env return = -115.73


 87%|████████████████████████████████     | 6931/8000 [1:42:29<15:30,  1.15it/s]

Episode 6931/8000, real env return = -117.34


 87%|████████████████████████████████     | 6941/8000 [1:42:38<15:15,  1.16it/s]

Episode 6941/8000, real env return = -117.35


 87%|████████████████████████████████▏    | 6951/8000 [1:42:47<15:38,  1.12it/s]

Episode 6951/8000, real env return = -115.96


 87%|████████████████████████████████▏    | 6961/8000 [1:42:56<15:15,  1.13it/s]

Episode 6961/8000, real env return = -116.21


 87%|████████████████████████████████▏    | 6971/8000 [1:43:05<15:12,  1.13it/s]

Episode 6971/8000, real env return = -117.28


 87%|████████████████████████████████▎    | 6981/8000 [1:43:14<14:40,  1.16it/s]

Episode 6981/8000, real env return = -116.12


 87%|████████████████████████████████▎    | 6991/8000 [1:43:23<14:55,  1.13it/s]

Episode 6991/8000, real env return = -116.94


 88%|████████████████████████████████▍    | 7001/8000 [1:43:32<14:39,  1.14it/s]

Episode 7001/8000, real env return = -116.30


 88%|████████████████████████████████▍    | 7011/8000 [1:43:40<14:27,  1.14it/s]

Episode 7011/8000, real env return = -116.37


 88%|████████████████████████████████▍    | 7021/8000 [1:43:49<14:47,  1.10it/s]

Episode 7021/8000, real env return = -117.83


 88%|████████████████████████████████▌    | 7031/8000 [1:43:58<14:35,  1.11it/s]

Episode 7031/8000, real env return = -123.46


 88%|████████████████████████████████▌    | 7041/8000 [1:44:08<14:57,  1.07it/s]

Episode 7041/8000, real env return = -116.71


 88%|████████████████████████████████▌    | 7051/8000 [1:44:17<14:17,  1.11it/s]

Episode 7051/8000, real env return = -116.71


 88%|████████████████████████████████▋    | 7061/8000 [1:44:25<13:45,  1.14it/s]

Episode 7061/8000, real env return = -119.42


 88%|████████████████████████████████▋    | 7071/8000 [1:44:34<13:46,  1.12it/s]

Episode 7071/8000, real env return = -115.43


 89%|████████████████████████████████▋    | 7081/8000 [1:44:43<13:31,  1.13it/s]

Episode 7081/8000, real env return = -115.78


 89%|████████████████████████████████▊    | 7091/8000 [1:44:52<13:32,  1.12it/s]

Episode 7091/8000, real env return = -115.44


 89%|████████████████████████████████▊    | 7101/8000 [1:45:01<13:22,  1.12it/s]

Episode 7101/8000, real env return = -116.58


 89%|████████████████████████████████▉    | 7111/8000 [1:45:10<13:19,  1.11it/s]

Episode 7111/8000, real env return = -116.83


 89%|████████████████████████████████▉    | 7121/8000 [1:45:19<13:17,  1.10it/s]

Episode 7121/8000, real env return = -116.07


 89%|████████████████████████████████▉    | 7131/8000 [1:45:28<12:48,  1.13it/s]

Episode 7131/8000, real env return = -115.06


 89%|█████████████████████████████████    | 7141/8000 [1:45:37<13:02,  1.10it/s]

Episode 7141/8000, real env return = -119.02


 89%|█████████████████████████████████    | 7151/8000 [1:45:46<12:32,  1.13it/s]

Episode 7151/8000, real env return = -116.78


 90%|█████████████████████████████████    | 7161/8000 [1:45:55<12:09,  1.15it/s]

Episode 7161/8000, real env return = -116.47


 90%|█████████████████████████████████▏   | 7171/8000 [1:46:04<12:19,  1.12it/s]

Episode 7171/8000, real env return = -116.62


 90%|█████████████████████████████████▏   | 7181/8000 [1:46:13<12:24,  1.10it/s]

Episode 7181/8000, real env return = -116.70


 90%|█████████████████████████████████▎   | 7191/8000 [1:46:22<12:30,  1.08it/s]

Episode 7191/8000, real env return = -106.93


 90%|█████████████████████████████████▎   | 7201/8000 [1:46:32<12:28,  1.07it/s]

Episode 7201/8000, real env return = -102.42


 90%|█████████████████████████████████▎   | 7211/8000 [1:46:41<11:47,  1.12it/s]

Episode 7211/8000, real env return = -102.54


 90%|█████████████████████████████████▍   | 7221/8000 [1:46:50<11:36,  1.12it/s]

Episode 7221/8000, real env return = -101.95


 90%|█████████████████████████████████▍   | 7231/8000 [1:46:59<11:21,  1.13it/s]

Episode 7231/8000, real env return = -102.60


 91%|█████████████████████████████████▍   | 7241/8000 [1:47:07<11:07,  1.14it/s]

Episode 7241/8000, real env return = -102.38


 91%|█████████████████████████████████▌   | 7251/8000 [1:47:16<11:15,  1.11it/s]

Episode 7251/8000, real env return = -102.88


 91%|█████████████████████████████████▌   | 7261/8000 [1:47:25<11:06,  1.11it/s]

Episode 7261/8000, real env return = -104.10


 91%|█████████████████████████████████▋   | 7271/8000 [1:47:34<11:11,  1.09it/s]

Episode 7271/8000, real env return = -101.87


 91%|█████████████████████████████████▋   | 7281/8000 [1:47:43<10:48,  1.11it/s]

Episode 7281/8000, real env return = -103.06


 91%|█████████████████████████████████▋   | 7291/8000 [1:47:52<10:36,  1.11it/s]

Episode 7291/8000, real env return = -103.08


 91%|█████████████████████████████████▊   | 7301/8000 [1:48:01<10:20,  1.13it/s]

Episode 7301/8000, real env return = -103.47


 91%|█████████████████████████████████▊   | 7311/8000 [1:48:10<10:19,  1.11it/s]

Episode 7311/8000, real env return = -104.08


 92%|█████████████████████████████████▊   | 7321/8000 [1:48:19<10:05,  1.12it/s]

Episode 7321/8000, real env return = -103.16


 92%|█████████████████████████████████▉   | 7331/8000 [1:48:28<10:01,  1.11it/s]

Episode 7331/8000, real env return = -103.59


 92%|█████████████████████████████████▉   | 7341/8000 [1:48:37<09:45,  1.13it/s]

Episode 7341/8000, real env return = -102.14


 92%|█████████████████████████████████▉   | 7351/8000 [1:48:46<09:46,  1.11it/s]

Episode 7351/8000, real env return = -104.64


 92%|██████████████████████████████████   | 7361/8000 [1:48:55<09:25,  1.13it/s]

Episode 7361/8000, real env return = -103.93


 92%|██████████████████████████████████   | 7371/8000 [1:49:04<09:17,  1.13it/s]

Episode 7371/8000, real env return = -104.11


 92%|██████████████████████████████████▏  | 7381/8000 [1:49:13<09:19,  1.11it/s]

Episode 7381/8000, real env return = -115.88


 92%|██████████████████████████████████▏  | 7391/8000 [1:49:21<09:01,  1.12it/s]

Episode 7391/8000, real env return = -116.59


 93%|██████████████████████████████████▏  | 7401/8000 [1:49:30<08:55,  1.12it/s]

Episode 7401/8000, real env return = -114.92


 93%|██████████████████████████████████▎  | 7411/8000 [1:49:39<08:32,  1.15it/s]

Episode 7411/8000, real env return = -116.37


 93%|██████████████████████████████████▎  | 7421/8000 [1:49:48<08:23,  1.15it/s]

Episode 7421/8000, real env return = -115.57


 93%|██████████████████████████████████▎  | 7431/8000 [1:49:57<08:32,  1.11it/s]

Episode 7431/8000, real env return = -118.10


 93%|██████████████████████████████████▍  | 7441/8000 [1:50:06<08:18,  1.12it/s]

Episode 7441/8000, real env return = -115.91


 93%|██████████████████████████████████▍  | 7451/8000 [1:50:15<08:14,  1.11it/s]

Episode 7451/8000, real env return = -101.07


 93%|██████████████████████████████████▌  | 7461/8000 [1:50:24<07:52,  1.14it/s]

Episode 7461/8000, real env return = -101.06


 93%|██████████████████████████████████▌  | 7471/8000 [1:50:32<07:38,  1.15it/s]

Episode 7471/8000, real env return = -100.57


 94%|██████████████████████████████████▌  | 7481/8000 [1:50:41<07:43,  1.12it/s]

Episode 7481/8000, real env return = -101.82


 94%|██████████████████████████████████▋  | 7491/8000 [1:50:50<07:30,  1.13it/s]

Episode 7491/8000, real env return = -101.36


 94%|██████████████████████████████████▋  | 7501/8000 [1:50:59<07:19,  1.14it/s]

Episode 7501/8000, real env return = -102.14


 94%|██████████████████████████████████▋  | 7511/8000 [1:51:08<07:22,  1.10it/s]

Episode 7511/8000, real env return = -121.41


 94%|██████████████████████████████████▊  | 7521/8000 [1:51:17<07:05,  1.13it/s]

Episode 7521/8000, real env return = -118.64


 94%|██████████████████████████████████▊  | 7531/8000 [1:51:26<07:02,  1.11it/s]

Episode 7531/8000, real env return = -116.21


 94%|██████████████████████████████████▉  | 7541/8000 [1:51:36<07:06,  1.08it/s]

Episode 7541/8000, real env return = -109.68


 94%|██████████████████████████████████▉  | 7551/8000 [1:51:45<07:09,  1.05it/s]

Episode 7551/8000, real env return = -117.09


 95%|██████████████████████████████████▉  | 7561/8000 [1:51:54<06:43,  1.09it/s]

Episode 7561/8000, real env return = -117.64


 95%|███████████████████████████████████  | 7571/8000 [1:52:03<06:30,  1.10it/s]

Episode 7571/8000, real env return = -115.18


 95%|███████████████████████████████████  | 7581/8000 [1:52:12<06:13,  1.12it/s]

Episode 7581/8000, real env return = -104.56


 95%|███████████████████████████████████  | 7591/8000 [1:52:21<06:05,  1.12it/s]

Episode 7591/8000, real env return = -103.95


 95%|███████████████████████████████████▏ | 7601/8000 [1:52:30<06:00,  1.11it/s]

Episode 7601/8000, real env return = -103.58


 95%|███████████████████████████████████▏ | 7611/8000 [1:52:40<06:09,  1.05it/s]

Episode 7611/8000, real env return = -102.90


 95%|███████████████████████████████████▏ | 7621/8000 [1:52:49<05:37,  1.12it/s]

Episode 7621/8000, real env return = -102.91


 95%|███████████████████████████████████▎ | 7631/8000 [1:52:57<05:24,  1.14it/s]

Episode 7631/8000, real env return = -101.54


 96%|███████████████████████████████████▎ | 7641/8000 [1:53:07<05:26,  1.10it/s]

Episode 7641/8000, real env return = -101.56


 96%|███████████████████████████████████▍ | 7651/8000 [1:53:15<05:10,  1.12it/s]

Episode 7651/8000, real env return = -103.90


 96%|███████████████████████████████████▍ | 7661/8000 [1:53:24<04:55,  1.15it/s]

Episode 7661/8000, real env return = -101.56


 96%|███████████████████████████████████▍ | 7671/8000 [1:53:33<04:51,  1.13it/s]

Episode 7671/8000, real env return = -103.63


 96%|███████████████████████████████████▌ | 7681/8000 [1:53:42<04:40,  1.14it/s]

Episode 7681/8000, real env return = -103.44


 96%|███████████████████████████████████▌ | 7691/8000 [1:53:51<04:29,  1.15it/s]

Episode 7691/8000, real env return = -102.33


 96%|███████████████████████████████████▌ | 7701/8000 [1:54:00<04:27,  1.12it/s]

Episode 7701/8000, real env return = -117.10


 96%|███████████████████████████████████▋ | 7711/8000 [1:54:08<04:13,  1.14it/s]

Episode 7711/8000, real env return = -116.79


 97%|███████████████████████████████████▋ | 7721/8000 [1:54:17<04:02,  1.15it/s]

Episode 7721/8000, real env return = -117.38


 97%|███████████████████████████████████▊ | 7731/8000 [1:54:26<03:57,  1.13it/s]

Episode 7731/8000, real env return = -116.72


 97%|███████████████████████████████████▊ | 7741/8000 [1:54:35<03:49,  1.13it/s]

Episode 7741/8000, real env return = -117.77


 97%|███████████████████████████████████▊ | 7751/8000 [1:54:44<03:37,  1.14it/s]

Episode 7751/8000, real env return = -117.78


 97%|███████████████████████████████████▉ | 7761/8000 [1:54:52<03:29,  1.14it/s]

Episode 7761/8000, real env return = -116.68


 97%|███████████████████████████████████▉ | 7771/8000 [1:55:01<03:22,  1.13it/s]

Episode 7771/8000, real env return = -116.77


 97%|███████████████████████████████████▉ | 7781/8000 [1:55:10<03:15,  1.12it/s]

Episode 7781/8000, real env return = -116.43


 97%|████████████████████████████████████ | 7791/8000 [1:55:19<03:03,  1.14it/s]

Episode 7791/8000, real env return = -101.67


 98%|████████████████████████████████████ | 7801/8000 [1:55:28<02:58,  1.12it/s]

Episode 7801/8000, real env return = -102.18


 98%|████████████████████████████████████▏| 7811/8000 [1:55:37<02:47,  1.13it/s]

Episode 7811/8000, real env return = -102.55


 98%|████████████████████████████████████▏| 7821/8000 [1:55:45<02:37,  1.14it/s]

Episode 7821/8000, real env return = -102.44


 98%|████████████████████████████████████▏| 7831/8000 [1:55:54<02:29,  1.13it/s]

Episode 7831/8000, real env return = -101.92


 98%|████████████████████████████████████▎| 7841/8000 [1:56:03<02:21,  1.12it/s]

Episode 7841/8000, real env return = -102.21


 98%|████████████████████████████████████▎| 7851/8000 [1:56:12<02:13,  1.12it/s]

Episode 7851/8000, real env return = -102.47


 98%|████████████████████████████████████▎| 7861/8000 [1:56:21<02:04,  1.11it/s]

Episode 7861/8000, real env return = -102.39


 98%|████████████████████████████████████▍| 7871/8000 [1:56:30<01:57,  1.10it/s]

Episode 7871/8000, real env return = -103.08


 99%|████████████████████████████████████▍| 7881/8000 [1:56:38<01:43,  1.15it/s]

Episode 7881/8000, real env return = -102.41


 99%|████████████████████████████████████▍| 7891/8000 [1:56:47<01:38,  1.11it/s]

Episode 7891/8000, real env return = -103.49


 99%|████████████████████████████████████▌| 7901/8000 [1:56:56<01:28,  1.12it/s]

Episode 7901/8000, real env return = -102.96


 99%|████████████████████████████████████▌| 7911/8000 [1:57:05<01:19,  1.12it/s]

Episode 7911/8000, real env return = -102.84


 99%|████████████████████████████████████▋| 7921/8000 [1:57:14<01:09,  1.14it/s]

Episode 7921/8000, real env return = -102.09


 99%|████████████████████████████████████▋| 7931/8000 [1:57:23<01:00,  1.13it/s]

Episode 7931/8000, real env return = -102.54


 99%|████████████████████████████████████▋| 7941/8000 [1:57:31<00:50,  1.16it/s]

Episode 7941/8000, real env return = -101.36


 99%|████████████████████████████████████▊| 7951/8000 [1:57:40<00:43,  1.13it/s]

Episode 7951/8000, real env return = -102.37


100%|████████████████████████████████████▊| 7961/8000 [1:57:49<00:34,  1.13it/s]

Episode 7961/8000, real env return = -101.88


100%|████████████████████████████████████▊| 7971/8000 [1:57:58<00:25,  1.12it/s]

Episode 7971/8000, real env return = -102.61


100%|████████████████████████████████████▉| 7981/8000 [1:58:07<00:16,  1.13it/s]

Episode 7981/8000, real env return = -102.69


100%|████████████████████████████████████▉| 7991/8000 [1:58:16<00:07,  1.15it/s]

Episode 7991/8000, real env return = -102.63


100%|█████████████████████████████████████| 8000/8000 [1:58:24<00:00,  1.13it/s]

Training finished.





In [5]:
train3 = main()

Using cuda device


  0%|                                                  | 0/8000 [00:00<?, ?it/s]

Episode 1/8000, real env return = -103.57


  0%|                                       | 11/8000 [00:12<3:43:07,  1.68s/it]

Episode 11/8000, real env return = -94.72


  0%|                                       | 21/8000 [00:28<3:09:28,  1.42s/it]

Episode 21/8000, real env return = -105.77


  0%|▏                                      | 31/8000 [00:41<2:58:59,  1.35s/it]

Episode 31/8000, real env return = -110.96


  1%|▏                                      | 41/8000 [00:51<2:07:23,  1.04it/s]

Episode 41/8000, real env return = -111.72


  1%|▏                                      | 51/8000 [01:00<2:02:43,  1.08it/s]

Episode 51/8000, real env return = -104.67


  1%|▎                                      | 61/8000 [01:15<3:29:18,  1.58s/it]

Episode 61/8000, real env return = -85.53


  1%|▎                                      | 71/8000 [01:28<2:22:54,  1.08s/it]

Episode 71/8000, real env return = -111.88


  1%|▍                                      | 81/8000 [01:47<4:16:34,  1.94s/it]

Episode 81/8000, real env return = -118.70


  1%|▍                                      | 91/8000 [02:07<4:29:37,  2.05s/it]

Episode 91/8000, real env return = -17.90


  1%|▍                                     | 101/8000 [02:21<2:36:23,  1.19s/it]

Episode 101/8000, real env return = -115.85


  1%|▌                                     | 111/8000 [02:32<2:08:19,  1.02it/s]

Episode 111/8000, real env return = -107.99


  2%|▌                                     | 121/8000 [02:42<2:06:45,  1.04it/s]

Episode 121/8000, real env return = -107.50


  2%|▌                                     | 131/8000 [02:57<4:01:11,  1.84s/it]

Episode 131/8000, real env return = -84.05


  2%|▋                                     | 141/8000 [03:19<4:37:46,  2.12s/it]

Episode 141/8000, real env return = -90.55


  2%|▋                                     | 151/8000 [03:39<4:05:53,  1.88s/it]

Episode 151/8000, real env return = -149.55


  2%|▊                                     | 161/8000 [03:51<2:10:05,  1.00it/s]

Episode 161/8000, real env return = -111.50


  2%|▊                                     | 171/8000 [04:07<3:42:08,  1.70s/it]

Episode 171/8000, real env return = -114.32


  2%|▊                                     | 181/8000 [04:21<4:00:39,  1.85s/it]

Episode 181/8000, real env return = -89.83


  2%|▉                                     | 191/8000 [04:38<3:13:21,  1.49s/it]

Episode 191/8000, real env return = -125.11


  3%|▉                                     | 201/8000 [04:54<3:38:41,  1.68s/it]

Episode 201/8000, real env return = -85.01


  3%|█                                     | 211/8000 [05:04<2:09:58,  1.00s/it]

Episode 211/8000, real env return = -109.36


  3%|█                                     | 221/8000 [05:16<2:29:31,  1.15s/it]

Episode 221/8000, real env return = -150.74


  3%|█                                     | 231/8000 [05:25<2:05:54,  1.03it/s]

Episode 231/8000, real env return = -111.57


  3%|█▏                                    | 241/8000 [05:34<2:01:43,  1.06it/s]

Episode 241/8000, real env return = -112.34


  3%|█▏                                    | 251/8000 [05:44<1:58:00,  1.09it/s]

Episode 251/8000, real env return = -111.39


  3%|█▏                                    | 261/8000 [05:53<1:59:11,  1.08it/s]

Episode 261/8000, real env return = -111.85


  3%|█▎                                    | 271/8000 [06:02<1:56:27,  1.11it/s]

Episode 271/8000, real env return = -111.36


  4%|█▎                                    | 281/8000 [06:11<1:55:44,  1.11it/s]

Episode 281/8000, real env return = -109.05


  4%|█▍                                    | 291/8000 [06:20<1:54:43,  1.12it/s]

Episode 291/8000, real env return = -107.90


  4%|█▍                                    | 301/8000 [06:29<1:54:19,  1.12it/s]

Episode 301/8000, real env return = -108.11


  4%|█▍                                    | 311/8000 [06:38<1:54:40,  1.12it/s]

Episode 311/8000, real env return = -106.89


  4%|█▌                                    | 321/8000 [06:47<1:55:05,  1.11it/s]

Episode 321/8000, real env return = -107.90


  4%|█▌                                    | 331/8000 [06:56<1:54:55,  1.11it/s]

Episode 331/8000, real env return = -106.30


  4%|█▌                                    | 341/8000 [07:05<1:55:48,  1.10it/s]

Episode 341/8000, real env return = -106.63


  4%|█▋                                    | 351/8000 [07:14<1:54:04,  1.12it/s]

Episode 351/8000, real env return = -105.34


  5%|█▋                                    | 361/8000 [07:23<1:55:29,  1.10it/s]

Episode 361/8000, real env return = -105.10


  5%|█▊                                    | 371/8000 [07:32<1:51:18,  1.14it/s]

Episode 371/8000, real env return = -103.79


  5%|█▊                                    | 381/8000 [07:40<1:50:49,  1.15it/s]

Episode 381/8000, real env return = -104.16


  5%|█▊                                    | 391/8000 [07:49<1:52:35,  1.13it/s]

Episode 391/8000, real env return = -103.83


  5%|█▉                                    | 401/8000 [07:58<1:53:03,  1.12it/s]

Episode 401/8000, real env return = -105.95


  5%|█▉                                    | 411/8000 [08:07<1:54:19,  1.11it/s]

Episode 411/8000, real env return = -107.60


  5%|█▉                                    | 421/8000 [08:16<1:53:35,  1.11it/s]

Episode 421/8000, real env return = -110.21


  5%|██                                    | 431/8000 [08:25<1:53:41,  1.11it/s]

Episode 431/8000, real env return = -105.31


  6%|██                                    | 441/8000 [08:34<1:51:35,  1.13it/s]

Episode 441/8000, real env return = -105.64


  6%|██▏                                   | 451/8000 [08:43<1:51:52,  1.12it/s]

Episode 451/8000, real env return = -104.55


  6%|██▏                                   | 461/8000 [08:52<1:49:34,  1.15it/s]

Episode 461/8000, real env return = -103.56


  6%|██▏                                   | 471/8000 [09:01<1:51:55,  1.12it/s]

Episode 471/8000, real env return = -104.57


  6%|██▎                                   | 481/8000 [09:09<1:52:29,  1.11it/s]

Episode 481/8000, real env return = -104.50


  6%|██▎                                   | 491/8000 [09:18<1:51:08,  1.13it/s]

Episode 491/8000, real env return = -103.89


  6%|██▍                                   | 501/8000 [09:27<1:50:55,  1.13it/s]

Episode 501/8000, real env return = -104.83


  6%|██▍                                   | 511/8000 [09:36<1:51:24,  1.12it/s]

Episode 511/8000, real env return = -103.06


  7%|██▍                                   | 521/8000 [09:45<1:49:43,  1.14it/s]

Episode 521/8000, real env return = -103.59


  7%|██▌                                   | 531/8000 [09:54<1:49:20,  1.14it/s]

Episode 531/8000, real env return = -103.26


  7%|██▌                                   | 541/8000 [10:03<1:50:39,  1.12it/s]

Episode 541/8000, real env return = -105.33


  7%|██▌                                   | 551/8000 [10:11<1:49:59,  1.13it/s]

Episode 551/8000, real env return = -104.56


  7%|██▋                                   | 561/8000 [10:20<1:51:06,  1.12it/s]

Episode 561/8000, real env return = -105.40


  7%|██▋                                   | 571/8000 [10:29<1:48:58,  1.14it/s]

Episode 571/8000, real env return = -103.56


  7%|██▊                                   | 581/8000 [10:38<1:50:11,  1.12it/s]

Episode 581/8000, real env return = -104.71


  7%|██▊                                   | 591/8000 [10:47<1:49:57,  1.12it/s]

Episode 591/8000, real env return = -105.44


  8%|██▊                                   | 601/8000 [10:56<1:50:25,  1.12it/s]

Episode 601/8000, real env return = -104.35


  8%|██▉                                   | 611/8000 [11:04<1:48:04,  1.14it/s]

Episode 611/8000, real env return = -107.78


  8%|██▉                                   | 621/8000 [11:13<1:48:39,  1.13it/s]

Episode 621/8000, real env return = -104.85


  8%|██▉                                   | 631/8000 [11:22<1:47:40,  1.14it/s]

Episode 631/8000, real env return = -104.77


  8%|███                                   | 641/8000 [11:31<1:48:30,  1.13it/s]

Episode 641/8000, real env return = -106.68


  8%|███                                   | 651/8000 [11:40<1:49:07,  1.12it/s]

Episode 651/8000, real env return = -106.22


  8%|███▏                                  | 661/8000 [11:49<1:48:18,  1.13it/s]

Episode 661/8000, real env return = -106.97


  8%|███▏                                  | 671/8000 [11:58<1:50:46,  1.10it/s]

Episode 671/8000, real env return = -104.51


  9%|███▏                                  | 681/8000 [12:07<1:48:11,  1.13it/s]

Episode 681/8000, real env return = -105.61


  9%|███▎                                  | 691/8000 [12:16<1:45:58,  1.15it/s]

Episode 691/8000, real env return = -105.20


  9%|███▎                                  | 701/8000 [12:25<1:46:04,  1.15it/s]

Episode 701/8000, real env return = -105.75


  9%|███▍                                  | 711/8000 [12:33<1:46:09,  1.14it/s]

Episode 711/8000, real env return = -106.99


  9%|███▍                                  | 721/8000 [12:42<1:47:40,  1.13it/s]

Episode 721/8000, real env return = -105.16


  9%|███▍                                  | 731/8000 [12:51<1:48:01,  1.12it/s]

Episode 731/8000, real env return = -104.87


  9%|███▌                                  | 741/8000 [13:00<1:46:25,  1.14it/s]

Episode 741/8000, real env return = -104.15


  9%|███▌                                  | 751/8000 [13:09<1:48:23,  1.11it/s]

Episode 751/8000, real env return = -113.09


 10%|███▌                                  | 761/8000 [13:18<1:46:24,  1.13it/s]

Episode 761/8000, real env return = -105.46


 10%|███▋                                  | 771/8000 [13:27<1:46:06,  1.14it/s]

Episode 771/8000, real env return = -104.55


 10%|███▋                                  | 781/8000 [13:36<1:48:26,  1.11it/s]

Episode 781/8000, real env return = -104.77


 10%|███▊                                  | 791/8000 [13:44<1:39:01,  1.21it/s]

Episode 791/8000, real env return = -105.30


 10%|███▊                                  | 801/8000 [13:53<1:41:18,  1.18it/s]

Episode 801/8000, real env return = -107.24


 10%|███▊                                  | 811/8000 [14:01<1:44:59,  1.14it/s]

Episode 811/8000, real env return = -104.47


 10%|███▉                                  | 821/8000 [14:10<1:45:56,  1.13it/s]

Episode 821/8000, real env return = -105.10


 10%|███▉                                  | 831/8000 [14:19<1:44:12,  1.15it/s]

Episode 831/8000, real env return = -105.52


 11%|███▉                                  | 841/8000 [14:27<1:40:57,  1.18it/s]

Episode 841/8000, real env return = -105.44


 11%|████                                  | 851/8000 [14:36<1:41:07,  1.18it/s]

Episode 851/8000, real env return = -104.97


 11%|████                                  | 861/8000 [14:45<1:45:18,  1.13it/s]

Episode 861/8000, real env return = -104.01


 11%|████▏                                 | 871/8000 [14:54<1:45:31,  1.13it/s]

Episode 871/8000, real env return = -105.09


 11%|████▏                                 | 881/8000 [15:02<1:43:48,  1.14it/s]

Episode 881/8000, real env return = -103.88


 11%|████▏                                 | 891/8000 [15:11<1:45:46,  1.12it/s]

Episode 891/8000, real env return = -105.40


 11%|████▎                                 | 901/8000 [15:20<1:45:37,  1.12it/s]

Episode 901/8000, real env return = -106.56


 11%|████▎                                 | 911/8000 [15:29<1:45:58,  1.11it/s]

Episode 911/8000, real env return = -104.47


 12%|████▎                                 | 921/8000 [15:38<1:44:46,  1.13it/s]

Episode 921/8000, real env return = -106.10


 12%|████▍                                 | 931/8000 [15:47<1:43:31,  1.14it/s]

Episode 931/8000, real env return = -106.87


 12%|████▍                                 | 941/8000 [15:56<1:42:24,  1.15it/s]

Episode 941/8000, real env return = -104.61


 12%|████▌                                 | 951/8000 [16:05<1:44:08,  1.13it/s]

Episode 951/8000, real env return = -107.22


 12%|████▌                                 | 961/8000 [16:13<1:43:20,  1.14it/s]

Episode 961/8000, real env return = -106.41


 12%|████▌                                 | 971/8000 [16:22<1:44:35,  1.12it/s]

Episode 971/8000, real env return = -104.80


 12%|████▋                                 | 981/8000 [16:31<1:44:59,  1.11it/s]

Episode 981/8000, real env return = -106.25


 12%|████▋                                 | 991/8000 [16:40<1:44:09,  1.12it/s]

Episode 991/8000, real env return = -104.28


 13%|████▋                                | 1001/8000 [16:49<1:42:48,  1.13it/s]

Episode 1001/8000, real env return = -108.73


 13%|████▋                                | 1011/8000 [16:58<1:43:07,  1.13it/s]

Episode 1011/8000, real env return = -104.35


 13%|████▋                                | 1021/8000 [17:07<1:44:35,  1.11it/s]

Episode 1021/8000, real env return = -105.58


 13%|████▊                                | 1031/8000 [17:15<1:42:11,  1.14it/s]

Episode 1031/8000, real env return = -104.25


 13%|████▊                                | 1041/8000 [17:24<1:42:16,  1.13it/s]

Episode 1041/8000, real env return = -106.68


 13%|████▊                                | 1051/8000 [17:33<1:41:37,  1.14it/s]

Episode 1051/8000, real env return = -105.44


 13%|████▉                                | 1061/8000 [17:42<1:42:59,  1.12it/s]

Episode 1061/8000, real env return = -107.36


 13%|████▉                                | 1071/8000 [17:51<1:42:02,  1.13it/s]

Episode 1071/8000, real env return = -104.85


 14%|████▉                                | 1081/8000 [18:00<1:41:46,  1.13it/s]

Episode 1081/8000, real env return = -108.52


 14%|█████                                | 1091/8000 [18:09<1:40:35,  1.14it/s]

Episode 1091/8000, real env return = -107.14


 14%|█████                                | 1101/8000 [18:17<1:44:06,  1.10it/s]

Episode 1101/8000, real env return = -105.52


 14%|█████▏                               | 1111/8000 [18:26<1:40:20,  1.14it/s]

Episode 1111/8000, real env return = -105.98


 14%|█████▏                               | 1121/8000 [18:35<1:41:42,  1.13it/s]

Episode 1121/8000, real env return = -105.71


 14%|█████▏                               | 1131/8000 [18:44<1:43:07,  1.11it/s]

Episode 1131/8000, real env return = -105.97


 14%|█████▎                               | 1141/8000 [18:53<1:41:35,  1.13it/s]

Episode 1141/8000, real env return = -106.40


 14%|█████▎                               | 1151/8000 [19:02<1:38:25,  1.16it/s]

Episode 1151/8000, real env return = -103.35


 15%|█████▎                               | 1161/8000 [19:11<1:38:57,  1.15it/s]

Episode 1161/8000, real env return = -105.44


 15%|█████▍                               | 1171/8000 [19:20<1:40:46,  1.13it/s]

Episode 1171/8000, real env return = -105.50


 15%|█████▍                               | 1181/8000 [19:28<1:40:10,  1.13it/s]

Episode 1181/8000, real env return = -105.33


 15%|█████▌                               | 1191/8000 [19:37<1:40:25,  1.13it/s]

Episode 1191/8000, real env return = -107.21


 15%|█████▌                               | 1201/8000 [19:46<1:37:34,  1.16it/s]

Episode 1201/8000, real env return = -104.74


 15%|█████▌                               | 1211/8000 [19:55<1:37:46,  1.16it/s]

Episode 1211/8000, real env return = -105.46


 15%|█████▋                               | 1221/8000 [20:03<1:35:14,  1.19it/s]

Episode 1221/8000, real env return = -105.91


 15%|█████▋                               | 1231/8000 [20:12<1:39:57,  1.13it/s]

Episode 1231/8000, real env return = -104.97


 16%|█████▋                               | 1241/8000 [20:21<1:40:40,  1.12it/s]

Episode 1241/8000, real env return = -104.69


 16%|█████▊                               | 1251/8000 [20:30<1:37:34,  1.15it/s]

Episode 1251/8000, real env return = -106.36


 16%|█████▊                               | 1261/8000 [20:39<1:40:18,  1.12it/s]

Episode 1261/8000, real env return = -105.96


 16%|█████▉                               | 1271/8000 [20:47<1:39:30,  1.13it/s]

Episode 1271/8000, real env return = -107.01


 16%|█████▉                               | 1281/8000 [20:56<1:38:47,  1.13it/s]

Episode 1281/8000, real env return = -105.03


 16%|█████▉                               | 1291/8000 [21:05<1:39:43,  1.12it/s]

Episode 1291/8000, real env return = -106.82


 16%|██████                               | 1301/8000 [21:14<1:36:50,  1.15it/s]

Episode 1301/8000, real env return = -105.96


 16%|██████                               | 1311/8000 [21:23<1:37:14,  1.15it/s]

Episode 1311/8000, real env return = -107.25


 17%|██████                               | 1321/8000 [21:32<1:39:55,  1.11it/s]

Episode 1321/8000, real env return = -109.67


 17%|██████▏                              | 1331/8000 [21:41<1:40:59,  1.10it/s]

Episode 1331/8000, real env return = -109.77


 17%|██████▏                              | 1341/8000 [21:49<1:37:40,  1.14it/s]

Episode 1341/8000, real env return = -110.05


 17%|██████▏                              | 1351/8000 [21:58<1:37:26,  1.14it/s]

Episode 1351/8000, real env return = -105.32


 17%|██████▎                              | 1361/8000 [22:07<1:37:35,  1.13it/s]

Episode 1361/8000, real env return = -104.66


 17%|██████▎                              | 1371/8000 [22:16<1:38:30,  1.12it/s]

Episode 1371/8000, real env return = -104.90


 17%|██████▍                              | 1381/8000 [22:25<1:36:15,  1.15it/s]

Episode 1381/8000, real env return = -104.40


 17%|██████▍                              | 1391/8000 [22:34<1:37:23,  1.13it/s]

Episode 1391/8000, real env return = -104.46


 18%|██████▍                              | 1401/8000 [22:43<1:37:37,  1.13it/s]

Episode 1401/8000, real env return = -104.14


 18%|██████▌                              | 1411/8000 [22:51<1:36:44,  1.14it/s]

Episode 1411/8000, real env return = -104.11


 18%|██████▌                              | 1421/8000 [23:00<1:33:58,  1.17it/s]

Episode 1421/8000, real env return = -104.12


 18%|██████▌                              | 1431/8000 [23:09<1:36:09,  1.14it/s]

Episode 1431/8000, real env return = -105.51


 18%|██████▋                              | 1441/8000 [23:18<1:35:18,  1.15it/s]

Episode 1441/8000, real env return = -103.17


 18%|██████▋                              | 1451/8000 [23:27<1:36:27,  1.13it/s]

Episode 1451/8000, real env return = -104.05


 18%|██████▊                              | 1461/8000 [23:35<1:36:52,  1.13it/s]

Episode 1461/8000, real env return = -104.72


 18%|██████▊                              | 1471/8000 [23:44<1:33:35,  1.16it/s]

Episode 1471/8000, real env return = -104.61


 19%|██████▊                              | 1481/8000 [23:53<1:36:44,  1.12it/s]

Episode 1481/8000, real env return = -104.89


 19%|██████▉                              | 1491/8000 [24:02<1:36:10,  1.13it/s]

Episode 1491/8000, real env return = -105.08


 19%|██████▉                              | 1501/8000 [24:11<1:35:16,  1.14it/s]

Episode 1501/8000, real env return = -104.31


 19%|██████▉                              | 1511/8000 [24:19<1:35:05,  1.14it/s]

Episode 1511/8000, real env return = -104.31


 19%|███████                              | 1521/8000 [24:28<1:36:16,  1.12it/s]

Episode 1521/8000, real env return = -100.90


 19%|███████                              | 1531/8000 [24:37<1:36:52,  1.11it/s]

Episode 1531/8000, real env return = -107.01


 19%|███████▏                             | 1541/8000 [24:46<1:37:21,  1.11it/s]

Episode 1541/8000, real env return = -103.26


 19%|███████▏                             | 1551/8000 [24:55<1:42:18,  1.05it/s]

Episode 1551/8000, real env return = -103.90


 20%|███████▏                             | 1561/8000 [25:04<1:36:36,  1.11it/s]

Episode 1561/8000, real env return = -104.31


 20%|███████▎                             | 1571/8000 [25:13<1:36:23,  1.11it/s]

Episode 1571/8000, real env return = -105.42


 20%|███████▎                             | 1581/8000 [25:22<1:34:13,  1.14it/s]

Episode 1581/8000, real env return = -106.01


 20%|███████▎                             | 1591/8000 [25:32<1:33:49,  1.14it/s]

Episode 1591/8000, real env return = -106.02


 20%|███████▍                             | 1601/8000 [25:41<1:35:00,  1.12it/s]

Episode 1601/8000, real env return = -106.39


 20%|███████▍                             | 1611/8000 [25:49<1:33:16,  1.14it/s]

Episode 1611/8000, real env return = -107.59


 20%|███████▍                             | 1621/8000 [25:59<1:39:13,  1.07it/s]

Episode 1621/8000, real env return = -136.81


 20%|███████▌                             | 1631/8000 [26:08<1:36:23,  1.10it/s]

Episode 1631/8000, real env return = -107.35


 21%|███████▌                             | 1641/8000 [26:17<1:34:48,  1.12it/s]

Episode 1641/8000, real env return = -104.71


 21%|███████▋                             | 1651/8000 [26:26<1:31:53,  1.15it/s]

Episode 1651/8000, real env return = -105.05


 21%|███████▋                             | 1661/8000 [26:34<1:30:00,  1.17it/s]

Episode 1661/8000, real env return = -105.61


 21%|███████▋                             | 1671/8000 [26:43<1:33:11,  1.13it/s]

Episode 1671/8000, real env return = -113.58


 21%|███████▊                             | 1681/8000 [26:52<1:33:03,  1.13it/s]

Episode 1681/8000, real env return = -106.77


 21%|███████▊                             | 1691/8000 [27:01<1:31:39,  1.15it/s]

Episode 1691/8000, real env return = -112.64


 21%|███████▊                             | 1701/8000 [27:09<1:31:32,  1.15it/s]

Episode 1701/8000, real env return = -113.34


 21%|███████▉                             | 1711/8000 [27:18<1:30:44,  1.16it/s]

Episode 1711/8000, real env return = -109.69


 22%|███████▉                             | 1721/8000 [27:27<1:29:47,  1.17it/s]

Episode 1721/8000, real env return = -108.56


 22%|████████                             | 1731/8000 [27:36<1:29:23,  1.17it/s]

Episode 1731/8000, real env return = -111.56


 22%|████████                             | 1741/8000 [27:44<1:32:34,  1.13it/s]

Episode 1741/8000, real env return = -110.82


 22%|████████                             | 1751/8000 [27:53<1:32:07,  1.13it/s]

Episode 1751/8000, real env return = -109.61


 22%|████████▏                            | 1761/8000 [28:02<1:31:38,  1.13it/s]

Episode 1761/8000, real env return = -110.05


 22%|████████▏                            | 1771/8000 [28:11<1:32:41,  1.12it/s]

Episode 1771/8000, real env return = -109.35


 22%|████████▏                            | 1781/8000 [28:20<1:31:50,  1.13it/s]

Episode 1781/8000, real env return = -109.17


 22%|████████▎                            | 1791/8000 [28:29<1:31:25,  1.13it/s]

Episode 1791/8000, real env return = -108.63


 23%|████████▎                            | 1801/8000 [28:37<1:29:39,  1.15it/s]

Episode 1801/8000, real env return = -109.59


 23%|████████▍                            | 1811/8000 [28:46<1:30:58,  1.13it/s]

Episode 1811/8000, real env return = -111.94


 23%|████████▍                            | 1821/8000 [28:55<1:31:27,  1.13it/s]

Episode 1821/8000, real env return = -109.50


 23%|████████▍                            | 1831/8000 [29:04<1:30:22,  1.14it/s]

Episode 1831/8000, real env return = -111.18


 23%|████████▌                            | 1841/8000 [29:13<1:28:47,  1.16it/s]

Episode 1841/8000, real env return = -109.36


 23%|████████▌                            | 1851/8000 [29:21<1:31:06,  1.12it/s]

Episode 1851/8000, real env return = -108.44


 23%|████████▌                            | 1861/8000 [29:30<1:30:28,  1.13it/s]

Episode 1861/8000, real env return = -108.32


 23%|████████▋                            | 1871/8000 [29:39<1:29:25,  1.14it/s]

Episode 1871/8000, real env return = -107.97


 24%|████████▋                            | 1881/8000 [29:48<1:30:17,  1.13it/s]

Episode 1881/8000, real env return = -108.10


 24%|████████▋                            | 1891/8000 [29:56<1:28:55,  1.14it/s]

Episode 1891/8000, real env return = -109.34


 24%|████████▊                            | 1901/8000 [30:05<1:30:08,  1.13it/s]

Episode 1901/8000, real env return = -109.17


 24%|████████▊                            | 1911/8000 [30:14<1:27:03,  1.17it/s]

Episode 1911/8000, real env return = -107.32


 24%|████████▉                            | 1921/8000 [30:23<1:28:39,  1.14it/s]

Episode 1921/8000, real env return = -109.06


 24%|████████▉                            | 1931/8000 [30:31<1:24:13,  1.20it/s]

Episode 1931/8000, real env return = -109.16


 24%|████████▉                            | 1941/8000 [30:40<1:29:57,  1.12it/s]

Episode 1941/8000, real env return = -106.28


 24%|█████████                            | 1951/8000 [30:48<1:24:19,  1.20it/s]

Episode 1951/8000, real env return = -107.59


 25%|█████████                            | 1961/8000 [30:56<1:24:16,  1.19it/s]

Episode 1961/8000, real env return = -107.81


 25%|█████████                            | 1971/8000 [31:05<1:22:05,  1.22it/s]

Episode 1971/8000, real env return = -107.45


 25%|█████████▏                           | 1981/8000 [31:13<1:29:14,  1.12it/s]

Episode 1981/8000, real env return = -106.22


 25%|█████████▏                           | 1991/8000 [31:22<1:26:26,  1.16it/s]

Episode 1991/8000, real env return = -108.78


 25%|█████████▎                           | 2001/8000 [31:31<1:27:20,  1.14it/s]

Episode 2001/8000, real env return = -108.72


 25%|█████████▎                           | 2011/8000 [31:39<1:23:19,  1.20it/s]

Episode 2011/8000, real env return = -108.95


 25%|█████████▎                           | 2021/8000 [31:48<1:23:39,  1.19it/s]

Episode 2021/8000, real env return = -108.47


 25%|█████████▍                           | 2031/8000 [31:56<1:24:35,  1.18it/s]

Episode 2031/8000, real env return = -110.10


 26%|█████████▍                           | 2041/8000 [32:05<1:23:29,  1.19it/s]

Episode 2041/8000, real env return = -110.32


 26%|█████████▍                           | 2051/8000 [32:13<1:26:40,  1.14it/s]

Episode 2051/8000, real env return = -109.04


 26%|█████████▌                           | 2061/8000 [32:22<1:22:49,  1.20it/s]

Episode 2061/8000, real env return = -109.89


 26%|█████████▌                           | 2071/8000 [32:30<1:22:09,  1.20it/s]

Episode 2071/8000, real env return = -108.25


 26%|█████████▌                           | 2081/8000 [32:39<1:27:06,  1.13it/s]

Episode 2081/8000, real env return = -108.14


 26%|█████████▋                           | 2091/8000 [32:47<1:24:33,  1.16it/s]

Episode 2091/8000, real env return = -107.34


 26%|█████████▋                           | 2101/8000 [32:56<1:22:16,  1.19it/s]

Episode 2101/8000, real env return = -108.98


 26%|█████████▊                           | 2111/8000 [33:04<1:23:49,  1.17it/s]

Episode 2111/8000, real env return = -108.99


 27%|█████████▊                           | 2121/8000 [33:13<1:22:23,  1.19it/s]

Episode 2121/8000, real env return = -108.07


 27%|█████████▊                           | 2131/8000 [33:21<1:25:34,  1.14it/s]

Episode 2131/8000, real env return = -107.05


 27%|█████████▉                           | 2141/8000 [33:30<1:21:36,  1.20it/s]

Episode 2141/8000, real env return = -107.80


 27%|█████████▉                           | 2151/8000 [33:38<1:25:35,  1.14it/s]

Episode 2151/8000, real env return = -108.58


 27%|█████████▉                           | 2161/8000 [33:47<1:21:41,  1.19it/s]

Episode 2161/8000, real env return = -107.78


 27%|██████████                           | 2171/8000 [33:55<1:25:36,  1.13it/s]

Episode 2171/8000, real env return = -108.68


 27%|██████████                           | 2181/8000 [34:04<1:24:52,  1.14it/s]

Episode 2181/8000, real env return = -107.52


 27%|██████████▏                          | 2191/8000 [34:12<1:22:58,  1.17it/s]

Episode 2191/8000, real env return = -108.23


 28%|██████████▏                          | 2201/8000 [34:21<1:20:35,  1.20it/s]

Episode 2201/8000, real env return = -107.78


 28%|██████████▏                          | 2211/8000 [34:29<1:21:55,  1.18it/s]

Episode 2211/8000, real env return = -107.50


 28%|██████████▎                          | 2221/8000 [34:38<1:23:15,  1.16it/s]

Episode 2221/8000, real env return = -108.57


 28%|██████████▎                          | 2231/8000 [34:46<1:19:53,  1.20it/s]

Episode 2231/8000, real env return = -109.57


 28%|██████████▎                          | 2241/8000 [34:55<1:20:01,  1.20it/s]

Episode 2241/8000, real env return = -107.55


 28%|██████████▍                          | 2251/8000 [35:03<1:23:34,  1.15it/s]

Episode 2251/8000, real env return = -108.33


 28%|██████████▍                          | 2261/8000 [35:12<1:22:34,  1.16it/s]

Episode 2261/8000, real env return = -112.17


 28%|██████████▌                          | 2271/8000 [35:20<1:20:48,  1.18it/s]

Episode 2271/8000, real env return = -111.89


 29%|██████████▌                          | 2281/8000 [35:29<1:19:17,  1.20it/s]

Episode 2281/8000, real env return = -106.94


 29%|██████████▌                          | 2291/8000 [35:37<1:19:09,  1.20it/s]

Episode 2291/8000, real env return = -107.62


 29%|██████████▋                          | 2301/8000 [35:46<1:22:48,  1.15it/s]

Episode 2301/8000, real env return = -105.78


 29%|██████████▋                          | 2311/8000 [35:54<1:19:15,  1.20it/s]

Episode 2311/8000, real env return = -106.13


 29%|██████████▋                          | 2321/8000 [36:03<1:24:04,  1.13it/s]

Episode 2321/8000, real env return = -111.32


 29%|██████████▊                          | 2331/8000 [36:11<1:20:23,  1.18it/s]

Episode 2331/8000, real env return = -107.18


 29%|██████████▊                          | 2341/8000 [36:20<1:19:12,  1.19it/s]

Episode 2341/8000, real env return = -108.99


 29%|██████████▊                          | 2351/8000 [36:28<1:21:53,  1.15it/s]

Episode 2351/8000, real env return = -108.54


 30%|██████████▉                          | 2361/8000 [36:37<1:19:53,  1.18it/s]

Episode 2361/8000, real env return = -107.73


 30%|██████████▉                          | 2371/8000 [36:45<1:18:34,  1.19it/s]

Episode 2371/8000, real env return = -107.78


 30%|███████████                          | 2381/8000 [36:53<1:19:47,  1.17it/s]

Episode 2381/8000, real env return = -108.80


 30%|███████████                          | 2391/8000 [37:02<1:18:24,  1.19it/s]

Episode 2391/8000, real env return = -107.39


 30%|███████████                          | 2401/8000 [37:10<1:17:14,  1.21it/s]

Episode 2401/8000, real env return = -109.15


 30%|███████████▏                         | 2411/8000 [37:19<1:18:24,  1.19it/s]

Episode 2411/8000, real env return = -109.81


 30%|███████████▏                         | 2421/8000 [37:28<1:20:03,  1.16it/s]

Episode 2421/8000, real env return = -106.78


 30%|███████████▏                         | 2431/8000 [37:36<1:17:30,  1.20it/s]

Episode 2431/8000, real env return = -107.49


 31%|███████████▎                         | 2441/8000 [37:44<1:19:10,  1.17it/s]

Episode 2441/8000, real env return = -107.94


 31%|███████████▎                         | 2451/8000 [37:53<1:21:12,  1.14it/s]

Episode 2451/8000, real env return = -103.75


 31%|███████████▍                         | 2461/8000 [38:01<1:18:21,  1.18it/s]

Episode 2461/8000, real env return = -104.44


 31%|███████████▍                         | 2471/8000 [38:10<1:17:52,  1.18it/s]

Episode 2471/8000, real env return = -113.13


 31%|███████████▍                         | 2481/8000 [38:19<1:17:13,  1.19it/s]

Episode 2481/8000, real env return = -110.15


 31%|███████████▌                         | 2491/8000 [38:27<1:21:42,  1.12it/s]

Episode 2491/8000, real env return = -109.70


 31%|███████████▌                         | 2501/8000 [38:36<1:17:46,  1.18it/s]

Episode 2501/8000, real env return = -107.62


 31%|███████████▌                         | 2511/8000 [38:44<1:16:08,  1.20it/s]

Episode 2511/8000, real env return = -107.15


 32%|███████████▋                         | 2521/8000 [38:53<1:17:04,  1.18it/s]

Episode 2521/8000, real env return = -107.65


 32%|███████████▋                         | 2531/8000 [39:01<1:16:21,  1.19it/s]

Episode 2531/8000, real env return = -107.59


 32%|███████████▊                         | 2541/8000 [39:09<1:16:39,  1.19it/s]

Episode 2541/8000, real env return = -104.97


 32%|███████████▊                         | 2551/8000 [39:18<1:17:03,  1.18it/s]

Episode 2551/8000, real env return = -103.55


 32%|███████████▊                         | 2561/8000 [39:26<1:15:02,  1.21it/s]

Episode 2561/8000, real env return = -103.33


 32%|███████████▉                         | 2571/8000 [39:35<1:15:58,  1.19it/s]

Episode 2571/8000, real env return = -106.59


 32%|███████████▉                         | 2581/8000 [39:43<1:17:57,  1.16it/s]

Episode 2581/8000, real env return = -107.01


 32%|███████████▉                         | 2591/8000 [39:52<1:16:07,  1.18it/s]

Episode 2591/8000, real env return = -106.54


 33%|████████████                         | 2601/8000 [40:00<1:18:58,  1.14it/s]

Episode 2601/8000, real env return = -104.53


 33%|████████████                         | 2611/8000 [40:09<1:14:56,  1.20it/s]

Episode 2611/8000, real env return = -104.20


 33%|████████████                         | 2621/8000 [40:18<1:20:32,  1.11it/s]

Episode 2621/8000, real env return = -110.33


 33%|████████████▏                        | 2631/8000 [40:26<1:19:18,  1.13it/s]

Episode 2631/8000, real env return = -109.13


 33%|████████████▏                        | 2641/8000 [40:35<1:16:58,  1.16it/s]

Episode 2641/8000, real env return = -104.15


 33%|████████████▎                        | 2651/8000 [40:43<1:15:36,  1.18it/s]

Episode 2651/8000, real env return = -103.62


 33%|████████████▎                        | 2661/8000 [40:51<1:16:53,  1.16it/s]

Episode 2661/8000, real env return = -103.12


 33%|████████████▎                        | 2671/8000 [41:00<1:17:47,  1.14it/s]

Episode 2671/8000, real env return = -103.41


 34%|████████████▍                        | 2681/8000 [41:08<1:14:24,  1.19it/s]

Episode 2681/8000, real env return = -101.88


 34%|████████████▍                        | 2691/8000 [41:17<1:12:13,  1.22it/s]

Episode 2691/8000, real env return = -104.03


 34%|████████████▍                        | 2701/8000 [41:25<1:13:05,  1.21it/s]

Episode 2701/8000, real env return = -103.19


 34%|████████████▌                        | 2711/8000 [41:34<1:15:09,  1.17it/s]

Episode 2711/8000, real env return = -102.38


 34%|████████████▌                        | 2721/8000 [41:42<1:16:20,  1.15it/s]

Episode 2721/8000, real env return = -101.50


 34%|████████████▋                        | 2731/8000 [41:51<1:16:35,  1.15it/s]

Episode 2731/8000, real env return = -103.38


 34%|████████████▋                        | 2741/8000 [41:59<1:14:16,  1.18it/s]

Episode 2741/8000, real env return = -103.17


 34%|████████████▋                        | 2751/8000 [42:08<1:15:00,  1.17it/s]

Episode 2751/8000, real env return = -110.53


 35%|████████████▊                        | 2761/8000 [42:16<1:12:14,  1.21it/s]

Episode 2761/8000, real env return = -109.71


 35%|████████████▊                        | 2771/8000 [42:24<1:11:46,  1.21it/s]

Episode 2771/8000, real env return = -109.76


 35%|████████████▊                        | 2781/8000 [42:33<1:14:11,  1.17it/s]

Episode 2781/8000, real env return = -107.98


 35%|████████████▉                        | 2791/8000 [42:42<1:15:49,  1.14it/s]

Episode 2791/8000, real env return = -109.26


 35%|████████████▉                        | 2801/8000 [42:50<1:11:21,  1.21it/s]

Episode 2801/8000, real env return = -108.33


 35%|█████████████                        | 2811/8000 [42:59<1:14:20,  1.16it/s]

Episode 2811/8000, real env return = -108.78


 35%|█████████████                        | 2821/8000 [43:07<1:12:31,  1.19it/s]

Episode 2821/8000, real env return = -108.95


 35%|█████████████                        | 2831/8000 [43:16<1:14:06,  1.16it/s]

Episode 2831/8000, real env return = -102.79


 36%|█████████████▏                       | 2841/8000 [43:24<1:14:04,  1.16it/s]

Episode 2841/8000, real env return = -103.35


 36%|█████████████▏                       | 2851/8000 [43:32<1:12:10,  1.19it/s]

Episode 2851/8000, real env return = -102.62


 36%|█████████████▏                       | 2861/8000 [43:41<1:13:07,  1.17it/s]

Episode 2861/8000, real env return = -102.18


 36%|█████████████▎                       | 2871/8000 [43:50<1:13:52,  1.16it/s]

Episode 2871/8000, real env return = -102.37


 36%|█████████████▎                       | 2881/8000 [43:58<1:14:07,  1.15it/s]

Episode 2881/8000, real env return = -102.77


 36%|█████████████▎                       | 2891/8000 [44:07<1:14:38,  1.14it/s]

Episode 2891/8000, real env return = -103.06


 36%|█████████████▍                       | 2901/8000 [44:16<1:12:55,  1.17it/s]

Episode 2901/8000, real env return = -103.36


 36%|█████████████▍                       | 2911/8000 [44:24<1:14:38,  1.14it/s]

Episode 2911/8000, real env return = -110.58


 37%|█████████████▌                       | 2921/8000 [44:33<1:14:17,  1.14it/s]

Episode 2921/8000, real env return = -107.26


 37%|█████████████▌                       | 2931/8000 [44:42<1:12:23,  1.17it/s]

Episode 2931/8000, real env return = -106.61


 37%|█████████████▌                       | 2941/8000 [44:50<1:14:12,  1.14it/s]

Episode 2941/8000, real env return = -108.69


 37%|█████████████▋                       | 2951/8000 [44:59<1:13:48,  1.14it/s]

Episode 2951/8000, real env return = -108.64


 37%|█████████████▋                       | 2961/8000 [45:07<1:09:59,  1.20it/s]

Episode 2961/8000, real env return = -105.69


 37%|█████████████▋                       | 2971/8000 [45:16<1:13:40,  1.14it/s]

Episode 2971/8000, real env return = -101.63


 37%|█████████████▊                       | 2981/8000 [45:25<1:13:21,  1.14it/s]

Episode 2981/8000, real env return = -103.55


 37%|█████████████▊                       | 2991/8000 [45:33<1:10:29,  1.18it/s]

Episode 2991/8000, real env return = -103.00


 38%|█████████████▉                       | 3001/8000 [45:42<1:09:03,  1.21it/s]

Episode 3001/8000, real env return = -109.07


 38%|█████████████▉                       | 3011/8000 [45:50<1:12:22,  1.15it/s]

Episode 3011/8000, real env return = -109.49


 38%|█████████████▉                       | 3021/8000 [45:59<1:12:46,  1.14it/s]

Episode 3021/8000, real env return = -109.02


 38%|██████████████                       | 3031/8000 [46:08<1:10:33,  1.17it/s]

Episode 3031/8000, real env return = -110.18


 38%|██████████████                       | 3041/8000 [46:16<1:12:33,  1.14it/s]

Episode 3041/8000, real env return = -109.22


 38%|██████████████                       | 3051/8000 [46:25<1:08:49,  1.20it/s]

Episode 3051/8000, real env return = -109.09


 38%|██████████████▏                      | 3061/8000 [46:33<1:11:11,  1.16it/s]

Episode 3061/8000, real env return = -111.38


 38%|██████████████▏                      | 3071/8000 [46:42<1:11:11,  1.15it/s]

Episode 3071/8000, real env return = -105.29


 39%|██████████████▏                      | 3081/8000 [46:51<1:12:03,  1.14it/s]

Episode 3081/8000, real env return = -103.32


 39%|██████████████▎                      | 3091/8000 [46:59<1:10:30,  1.16it/s]

Episode 3091/8000, real env return = -103.36


 39%|██████████████▎                      | 3101/8000 [47:08<1:07:52,  1.20it/s]

Episode 3101/8000, real env return = -103.20


 39%|██████████████▍                      | 3111/8000 [47:16<1:10:01,  1.16it/s]

Episode 3111/8000, real env return = -103.63


 39%|██████████████▍                      | 3121/8000 [47:25<1:10:00,  1.16it/s]

Episode 3121/8000, real env return = -109.17


 39%|██████████████▍                      | 3131/8000 [47:33<1:09:49,  1.16it/s]

Episode 3131/8000, real env return = -110.17


 39%|██████████████▌                      | 3141/8000 [47:42<1:07:21,  1.20it/s]

Episode 3141/8000, real env return = -108.39


 39%|██████████████▌                      | 3151/8000 [47:50<1:09:41,  1.16it/s]

Episode 3151/8000, real env return = -107.39


 40%|██████████████▌                      | 3161/8000 [47:59<1:09:46,  1.16it/s]

Episode 3161/8000, real env return = -107.29


 40%|██████████████▋                      | 3171/8000 [48:08<1:07:55,  1.18it/s]

Episode 3171/8000, real env return = -108.15


 40%|██████████████▋                      | 3181/8000 [48:16<1:08:48,  1.17it/s]

Episode 3181/8000, real env return = -107.59


 40%|██████████████▊                      | 3191/8000 [48:25<1:10:24,  1.14it/s]

Episode 3191/8000, real env return = -109.36


 40%|██████████████▊                      | 3201/8000 [48:33<1:08:05,  1.17it/s]

Episode 3201/8000, real env return = -110.65


 40%|██████████████▊                      | 3211/8000 [48:42<1:08:40,  1.16it/s]

Episode 3211/8000, real env return = -108.84


 40%|██████████████▉                      | 3221/8000 [48:50<1:10:43,  1.13it/s]

Episode 3221/8000, real env return = -104.07


 40%|██████████████▉                      | 3231/8000 [48:59<1:08:16,  1.16it/s]

Episode 3231/8000, real env return = -102.68


 41%|██████████████▉                      | 3241/8000 [49:08<1:08:12,  1.16it/s]

Episode 3241/8000, real env return = -104.18


 41%|███████████████                      | 3251/8000 [49:16<1:08:34,  1.15it/s]

Episode 3251/8000, real env return = -114.95


 41%|███████████████                      | 3261/8000 [49:25<1:09:25,  1.14it/s]

Episode 3261/8000, real env return = -109.26


 41%|███████████████▏                     | 3271/8000 [49:33<1:06:06,  1.19it/s]

Episode 3271/8000, real env return = -111.72


 41%|███████████████▏                     | 3281/8000 [49:42<1:06:01,  1.19it/s]

Episode 3281/8000, real env return = -110.37


 41%|███████████████▏                     | 3291/8000 [49:50<1:04:59,  1.21it/s]

Episode 3291/8000, real env return = -109.12


 41%|███████████████▎                     | 3301/8000 [49:59<1:08:47,  1.14it/s]

Episode 3301/8000, real env return = -103.17


 41%|███████████████▎                     | 3311/8000 [50:08<1:05:40,  1.19it/s]

Episode 3311/8000, real env return = -108.20


 42%|███████████████▎                     | 3321/8000 [50:16<1:07:23,  1.16it/s]

Episode 3321/8000, real env return = -102.36


 42%|███████████████▍                     | 3331/8000 [50:25<1:09:53,  1.11it/s]

Episode 3331/8000, real env return = -100.35


 42%|███████████████▍                     | 3341/8000 [50:34<1:09:08,  1.12it/s]

Episode 3341/8000, real env return = -102.61


 42%|███████████████▍                     | 3351/8000 [50:42<1:07:28,  1.15it/s]

Episode 3351/8000, real env return = -103.79


 42%|███████████████▌                     | 3361/8000 [50:51<1:07:46,  1.14it/s]

Episode 3361/8000, real env return = -108.22


 42%|███████████████▌                     | 3371/8000 [51:00<1:05:28,  1.18it/s]

Episode 3371/8000, real env return = -106.63


 42%|███████████████▋                     | 3381/8000 [51:08<1:08:42,  1.12it/s]

Episode 3381/8000, real env return = -101.89


 42%|███████████████▋                     | 3391/8000 [51:17<1:03:28,  1.21it/s]

Episode 3391/8000, real env return = -102.53


 43%|███████████████▋                     | 3401/8000 [51:25<1:04:50,  1.18it/s]

Episode 3401/8000, real env return = -107.63


 43%|███████████████▊                     | 3411/8000 [51:34<1:07:06,  1.14it/s]

Episode 3411/8000, real env return = -102.51


 43%|███████████████▊                     | 3421/8000 [51:43<1:06:48,  1.14it/s]

Episode 3421/8000, real env return = -106.66


 43%|███████████████▊                     | 3431/8000 [51:52<1:06:12,  1.15it/s]

Episode 3431/8000, real env return = -107.47


 43%|███████████████▉                     | 3441/8000 [52:00<1:06:24,  1.14it/s]

Episode 3441/8000, real env return = -107.46


 43%|███████████████▉                     | 3451/8000 [52:09<1:07:04,  1.13it/s]

Episode 3451/8000, real env return = -103.85


 43%|████████████████                     | 3461/8000 [52:18<1:06:09,  1.14it/s]

Episode 3461/8000, real env return = -103.43


 43%|████████████████                     | 3471/8000 [52:26<1:06:35,  1.13it/s]

Episode 3471/8000, real env return = -102.72


 44%|████████████████                     | 3481/8000 [52:35<1:06:08,  1.14it/s]

Episode 3481/8000, real env return = -103.21


 44%|████████████████▏                    | 3491/8000 [52:44<1:06:35,  1.13it/s]

Episode 3491/8000, real env return = -103.95


 44%|████████████████▏                    | 3501/8000 [52:53<1:05:03,  1.15it/s]

Episode 3501/8000, real env return = -102.78


 44%|████████████████▏                    | 3511/8000 [53:01<1:04:51,  1.15it/s]

Episode 3511/8000, real env return = -108.34


 44%|████████████████▎                    | 3521/8000 [53:10<1:05:56,  1.13it/s]

Episode 3521/8000, real env return = -108.53


 44%|████████████████▎                    | 3531/8000 [53:19<1:04:08,  1.16it/s]

Episode 3531/8000, real env return = -105.67


 44%|████████████████▍                    | 3541/8000 [53:27<1:03:48,  1.16it/s]

Episode 3541/8000, real env return = -108.56


 44%|████████████████▍                    | 3551/8000 [53:36<1:02:44,  1.18it/s]

Episode 3551/8000, real env return = -106.06


 45%|████████████████▍                    | 3561/8000 [53:44<1:04:11,  1.15it/s]

Episode 3561/8000, real env return = -107.25


 45%|████████████████▌                    | 3571/8000 [53:53<1:01:53,  1.19it/s]

Episode 3571/8000, real env return = -104.77


 45%|████████████████▌                    | 3581/8000 [54:01<1:02:28,  1.18it/s]

Episode 3581/8000, real env return = -104.98


 45%|████████████████▌                    | 3591/8000 [54:10<1:04:21,  1.14it/s]

Episode 3591/8000, real env return = -103.71


 45%|████████████████▋                    | 3601/8000 [54:19<1:09:45,  1.05it/s]

Episode 3601/8000, real env return = -103.45


 45%|████████████████▋                    | 3611/8000 [54:28<1:02:52,  1.16it/s]

Episode 3611/8000, real env return = -103.99


 45%|████████████████▋                    | 3621/8000 [54:36<1:01:17,  1.19it/s]

Episode 3621/8000, real env return = -104.13


 45%|████████████████▊                    | 3631/8000 [54:45<1:04:11,  1.13it/s]

Episode 3631/8000, real env return = -103.86


 46%|████████████████▊                    | 3641/8000 [54:54<1:04:16,  1.13it/s]

Episode 3641/8000, real env return = -104.56


 46%|████████████████▉                    | 3651/8000 [55:02<1:02:39,  1.16it/s]

Episode 3651/8000, real env return = -103.46


 46%|████████████████▉                    | 3661/8000 [55:11<1:03:23,  1.14it/s]

Episode 3661/8000, real env return = -103.52


 46%|████████████████▉                    | 3671/8000 [55:20<1:03:02,  1.14it/s]

Episode 3671/8000, real env return = -106.16


 46%|█████████████████                    | 3681/8000 [55:29<1:04:23,  1.12it/s]

Episode 3681/8000, real env return = -103.39


 46%|█████████████████                    | 3691/8000 [55:37<1:02:42,  1.15it/s]

Episode 3691/8000, real env return = -102.99


 46%|█████████████████                    | 3701/8000 [55:46<1:02:37,  1.14it/s]

Episode 3701/8000, real env return = -103.21


 46%|█████████████████▏                   | 3711/8000 [55:55<1:02:20,  1.15it/s]

Episode 3711/8000, real env return = -103.56


 47%|█████████████████▏                   | 3721/8000 [56:04<1:01:51,  1.15it/s]

Episode 3721/8000, real env return = -107.23


 47%|█████████████████▎                   | 3731/8000 [56:12<1:03:03,  1.13it/s]

Episode 3731/8000, real env return = -102.79


 47%|█████████████████▎                   | 3741/8000 [56:21<1:02:51,  1.13it/s]

Episode 3741/8000, real env return = -103.25


 47%|█████████████████▎                   | 3751/8000 [56:30<1:01:44,  1.15it/s]

Episode 3751/8000, real env return = -106.60


 47%|█████████████████▍                   | 3761/8000 [56:39<1:01:20,  1.15it/s]

Episode 3761/8000, real env return = -103.12


 47%|█████████████████▍                   | 3771/8000 [56:47<1:00:29,  1.17it/s]

Episode 3771/8000, real env return = -103.80


 47%|█████████████████▍                   | 3781/8000 [56:56<1:02:03,  1.13it/s]

Episode 3781/8000, real env return = -108.32


 47%|█████████████████▌                   | 3791/8000 [57:05<1:02:44,  1.12it/s]

Episode 3791/8000, real env return = -106.67


 48%|█████████████████▌                   | 3801/8000 [57:14<1:02:06,  1.13it/s]

Episode 3801/8000, real env return = -103.63


 48%|██████████████████▌                    | 3811/8000 [57:22<59:44,  1.17it/s]

Episode 3811/8000, real env return = -103.93


 48%|█████████████████▋                   | 3821/8000 [57:31<1:00:20,  1.15it/s]

Episode 3821/8000, real env return = -103.41


 48%|█████████████████▋                   | 3831/8000 [57:40<1:00:59,  1.14it/s]

Episode 3831/8000, real env return = -107.95


 48%|█████████████████▊                   | 3841/8000 [57:49<1:01:32,  1.13it/s]

Episode 3841/8000, real env return = -105.69


 48%|█████████████████▊                   | 3851/8000 [57:57<1:00:02,  1.15it/s]

Episode 3851/8000, real env return = -103.70


 48%|█████████████████▊                   | 3861/8000 [58:06<1:00:50,  1.13it/s]

Episode 3861/8000, real env return = -104.69


 48%|█████████████████▉                   | 3871/8000 [58:15<1:00:15,  1.14it/s]

Episode 3871/8000, real env return = -107.32


 49%|█████████████████▉                   | 3881/8000 [58:24<1:00:29,  1.13it/s]

Episode 3881/8000, real env return = -102.29


 49%|█████████████████▉                   | 3891/8000 [58:32<1:00:23,  1.13it/s]

Episode 3891/8000, real env return = -102.21


 49%|███████████████████                    | 3901/8000 [58:41<59:44,  1.14it/s]

Episode 3901/8000, real env return = -102.61


 49%|███████████████████                    | 3911/8000 [58:50<59:18,  1.15it/s]

Episode 3911/8000, real env return = -104.40


 49%|███████████████████                    | 3921/8000 [58:59<59:37,  1.14it/s]

Episode 3921/8000, real env return = -108.25


 49%|██████████████████▏                  | 3931/8000 [59:07<1:00:00,  1.13it/s]

Episode 3931/8000, real env return = -103.50


 49%|███████████████████▏                   | 3941/8000 [59:16<59:38,  1.13it/s]

Episode 3941/8000, real env return = -110.21


 49%|███████████████████▎                   | 3951/8000 [59:25<58:39,  1.15it/s]

Episode 3951/8000, real env return = -108.24


 50%|███████████████████▎                   | 3961/8000 [59:34<58:23,  1.15it/s]

Episode 3961/8000, real env return = -109.54


 50%|███████████████████▎                   | 3971/8000 [59:42<59:22,  1.13it/s]

Episode 3971/8000, real env return = -103.86


 50%|███████████████████▍                   | 3981/8000 [59:51<56:59,  1.18it/s]

Episode 3981/8000, real env return = -109.64


 50%|██████████████████▍                  | 3991/8000 [1:00:00<58:25,  1.14it/s]

Episode 3991/8000, real env return = -109.81


 50%|██████████████████▌                  | 4001/8000 [1:00:09<59:03,  1.13it/s]

Episode 4001/8000, real env return = -108.81


 50%|██████████████████▌                  | 4011/8000 [1:00:17<58:05,  1.14it/s]

Episode 4011/8000, real env return = -103.90


 50%|██████████████████▌                  | 4021/8000 [1:00:26<57:52,  1.15it/s]

Episode 4021/8000, real env return = -102.62


 50%|██████████████████▋                  | 4031/8000 [1:00:35<57:48,  1.14it/s]

Episode 4031/8000, real env return = -108.23


 51%|██████████████████▋                  | 4041/8000 [1:00:44<58:06,  1.14it/s]

Episode 4041/8000, real env return = -109.73


 51%|██████████████████▋                  | 4051/8000 [1:00:52<58:13,  1.13it/s]

Episode 4051/8000, real env return = -103.23


 51%|██████████████████▊                  | 4061/8000 [1:01:01<58:42,  1.12it/s]

Episode 4061/8000, real env return = -103.68


 51%|██████████████████▊                  | 4071/8000 [1:01:10<58:15,  1.12it/s]

Episode 4071/8000, real env return = -109.56


 51%|██████████████████▊                  | 4081/8000 [1:01:19<58:12,  1.12it/s]

Episode 4081/8000, real env return = -108.07


 51%|██████████████████▉                  | 4091/8000 [1:01:28<56:23,  1.16it/s]

Episode 4091/8000, real env return = -103.56


 51%|██████████████████▉                  | 4101/8000 [1:01:37<57:05,  1.14it/s]

Episode 4101/8000, real env return = -103.39


 51%|███████████████████                  | 4111/8000 [1:01:45<55:58,  1.16it/s]

Episode 4111/8000, real env return = -109.84


 52%|███████████████████                  | 4121/8000 [1:01:54<56:46,  1.14it/s]

Episode 4121/8000, real env return = -108.07


 52%|███████████████████                  | 4131/8000 [1:02:03<57:21,  1.12it/s]

Episode 4131/8000, real env return = -103.02


 52%|███████████████████▏                 | 4141/8000 [1:02:12<56:45,  1.13it/s]

Episode 4141/8000, real env return = -103.11


 52%|███████████████████▏                 | 4151/8000 [1:02:20<55:46,  1.15it/s]

Episode 4151/8000, real env return = -108.28


 52%|███████████████████▏                 | 4161/8000 [1:02:29<57:08,  1.12it/s]

Episode 4161/8000, real env return = -110.16


 52%|███████████████████▎                 | 4171/8000 [1:02:38<55:21,  1.15it/s]

Episode 4171/8000, real env return = -102.93


 52%|███████████████████▎                 | 4181/8000 [1:02:47<55:44,  1.14it/s]

Episode 4181/8000, real env return = -103.59


 52%|███████████████████▍                 | 4191/8000 [1:02:56<54:19,  1.17it/s]

Episode 4191/8000, real env return = -107.55


 53%|███████████████████▍                 | 4201/8000 [1:03:04<55:58,  1.13it/s]

Episode 4201/8000, real env return = -107.11


 53%|███████████████████▍                 | 4211/8000 [1:03:13<54:53,  1.15it/s]

Episode 4211/8000, real env return = -103.24


 53%|███████████████████▌                 | 4221/8000 [1:03:22<55:16,  1.14it/s]

Episode 4221/8000, real env return = -103.33


 53%|███████████████████▌                 | 4231/8000 [1:03:31<54:47,  1.15it/s]

Episode 4231/8000, real env return = -109.59


 53%|███████████████████▌                 | 4241/8000 [1:03:40<55:46,  1.12it/s]

Episode 4241/8000, real env return = -109.01


 53%|███████████████████▋                 | 4251/8000 [1:03:48<54:35,  1.14it/s]

Episode 4251/8000, real env return = -102.82


 53%|███████████████████▋                 | 4261/8000 [1:03:57<54:59,  1.13it/s]

Episode 4261/8000, real env return = -102.63


 53%|███████████████████▊                 | 4271/8000 [1:04:06<54:17,  1.14it/s]

Episode 4271/8000, real env return = -109.44


 54%|███████████████████▊                 | 4281/8000 [1:04:15<54:18,  1.14it/s]

Episode 4281/8000, real env return = -110.23


 54%|███████████████████▊                 | 4291/8000 [1:04:23<54:30,  1.13it/s]

Episode 4291/8000, real env return = -102.37


 54%|███████████████████▉                 | 4301/8000 [1:04:32<54:53,  1.12it/s]

Episode 4301/8000, real env return = -101.01


 54%|███████████████████▉                 | 4311/8000 [1:04:41<54:00,  1.14it/s]

Episode 4311/8000, real env return = -109.81


 54%|███████████████████▉                 | 4321/8000 [1:04:50<54:45,  1.12it/s]

Episode 4321/8000, real env return = -108.65


 54%|████████████████████                 | 4331/8000 [1:04:59<54:30,  1.12it/s]

Episode 4331/8000, real env return = -103.65


 54%|████████████████████                 | 4341/8000 [1:05:08<55:47,  1.09it/s]

Episode 4341/8000, real env return = -102.78


 54%|████████████████████                 | 4351/8000 [1:05:17<55:58,  1.09it/s]

Episode 4351/8000, real env return = -108.67


 55%|████████████████████▏                | 4361/8000 [1:05:26<53:20,  1.14it/s]

Episode 4361/8000, real env return = -108.76


 55%|████████████████████▏                | 4371/8000 [1:05:35<52:47,  1.15it/s]

Episode 4371/8000, real env return = -103.84


 55%|████████████████████▎                | 4381/8000 [1:05:44<53:21,  1.13it/s]

Episode 4381/8000, real env return = -103.81


 55%|████████████████████▎                | 4391/8000 [1:05:53<53:13,  1.13it/s]

Episode 4391/8000, real env return = -108.48


 55%|████████████████████▎                | 4401/8000 [1:06:01<52:11,  1.15it/s]

Episode 4401/8000, real env return = -109.28


 55%|████████████████████▍                | 4411/8000 [1:06:10<54:40,  1.09it/s]

Episode 4411/8000, real env return = -103.60


 55%|████████████████████▍                | 4421/8000 [1:06:19<51:04,  1.17it/s]

Episode 4421/8000, real env return = -103.41


 55%|████████████████████▍                | 4431/8000 [1:06:28<51:56,  1.15it/s]

Episode 4431/8000, real env return = -103.86


 56%|████████████████████▌                | 4441/8000 [1:06:36<50:55,  1.16it/s]

Episode 4441/8000, real env return = -108.67


 56%|████████████████████▌                | 4451/8000 [1:06:45<53:34,  1.10it/s]

Episode 4451/8000, real env return = -103.61


 56%|████████████████████▋                | 4461/8000 [1:06:54<51:59,  1.13it/s]

Episode 4461/8000, real env return = -103.49


 56%|████████████████████▋                | 4471/8000 [1:07:03<49:06,  1.20it/s]

Episode 4471/8000, real env return = -103.63


 56%|████████████████████▋                | 4481/8000 [1:07:11<51:34,  1.14it/s]

Episode 4481/8000, real env return = -108.62


 56%|████████████████████▊                | 4491/8000 [1:07:20<51:04,  1.15it/s]

Episode 4491/8000, real env return = -103.64


 56%|████████████████████▊                | 4501/8000 [1:07:29<49:56,  1.17it/s]

Episode 4501/8000, real env return = -103.44


 56%|████████████████████▊                | 4511/8000 [1:07:38<50:18,  1.16it/s]

Episode 4511/8000, real env return = -103.31


 57%|████████████████████▉                | 4521/8000 [1:07:46<50:06,  1.16it/s]

Episode 4521/8000, real env return = -108.83


 57%|████████████████████▉                | 4531/8000 [1:07:55<51:03,  1.13it/s]

Episode 4531/8000, real env return = -103.15


 57%|█████████████████████                | 4541/8000 [1:08:04<50:08,  1.15it/s]

Episode 4541/8000, real env return = -103.76


 57%|█████████████████████                | 4551/8000 [1:08:13<51:33,  1.11it/s]

Episode 4551/8000, real env return = -103.18


 57%|█████████████████████                | 4561/8000 [1:08:21<49:43,  1.15it/s]

Episode 4561/8000, real env return = -108.25


 57%|█████████████████████▏               | 4571/8000 [1:08:30<50:43,  1.13it/s]

Episode 4571/8000, real env return = -103.51


 57%|█████████████████████▏               | 4581/8000 [1:08:39<49:31,  1.15it/s]

Episode 4581/8000, real env return = -103.17


 57%|█████████████████████▏               | 4591/8000 [1:08:47<49:48,  1.14it/s]

Episode 4591/8000, real env return = -104.30


 58%|█████████████████████▎               | 4601/8000 [1:08:56<49:40,  1.14it/s]

Episode 4601/8000, real env return = -109.36


 58%|█████████████████████▎               | 4611/8000 [1:09:05<49:54,  1.13it/s]

Episode 4611/8000, real env return = -103.59


 58%|█████████████████████▎               | 4621/8000 [1:09:14<49:16,  1.14it/s]

Episode 4621/8000, real env return = -103.12


 58%|█████████████████████▍               | 4631/8000 [1:09:22<47:57,  1.17it/s]

Episode 4631/8000, real env return = -103.74


 58%|█████████████████████▍               | 4641/8000 [1:09:31<48:33,  1.15it/s]

Episode 4641/8000, real env return = -108.96


 58%|█████████████████████▌               | 4651/8000 [1:09:40<46:59,  1.19it/s]

Episode 4651/8000, real env return = -103.99


 58%|█████████████████████▌               | 4661/8000 [1:09:48<48:07,  1.16it/s]

Episode 4661/8000, real env return = -103.27


 58%|█████████████████████▌               | 4671/8000 [1:09:57<47:46,  1.16it/s]

Episode 4671/8000, real env return = -103.81


 59%|█████████████████████▋               | 4681/8000 [1:10:06<49:34,  1.12it/s]

Episode 4681/8000, real env return = -107.39


 59%|█████████████████████▋               | 4691/8000 [1:10:14<48:06,  1.15it/s]

Episode 4691/8000, real env return = -103.13


 59%|█████████████████████▋               | 4701/8000 [1:10:23<47:01,  1.17it/s]

Episode 4701/8000, real env return = -103.35


 59%|█████████████████████▊               | 4711/8000 [1:10:31<47:33,  1.15it/s]

Episode 4711/8000, real env return = -103.15


 59%|█████████████████████▊               | 4721/8000 [1:10:40<47:09,  1.16it/s]

Episode 4721/8000, real env return = -108.44


 59%|█████████████████████▉               | 4731/8000 [1:10:49<47:37,  1.14it/s]

Episode 4731/8000, real env return = -102.97


 59%|█████████████████████▉               | 4741/8000 [1:10:58<46:22,  1.17it/s]

Episode 4741/8000, real env return = -102.93


 59%|█████████████████████▉               | 4751/8000 [1:11:06<48:11,  1.12it/s]

Episode 4751/8000, real env return = -103.07


 60%|██████████████████████               | 4761/8000 [1:11:15<47:39,  1.13it/s]

Episode 4761/8000, real env return = -108.14


 60%|██████████████████████               | 4771/8000 [1:11:24<47:07,  1.14it/s]

Episode 4771/8000, real env return = -102.98


 60%|██████████████████████               | 4781/8000 [1:11:32<46:41,  1.15it/s]

Episode 4781/8000, real env return = -103.71


 60%|██████████████████████▏              | 4791/8000 [1:11:41<47:33,  1.12it/s]

Episode 4791/8000, real env return = -105.93


 60%|██████████████████████▏              | 4801/8000 [1:11:50<46:13,  1.15it/s]

Episode 4801/8000, real env return = -108.45


 60%|██████████████████████▎              | 4811/8000 [1:11:59<46:40,  1.14it/s]

Episode 4811/8000, real env return = -103.19


 60%|██████████████████████▎              | 4821/8000 [1:12:07<45:13,  1.17it/s]

Episode 4821/8000, real env return = -103.26


 60%|██████████████████████▎              | 4831/8000 [1:12:16<45:48,  1.15it/s]

Episode 4831/8000, real env return = -103.49


 61%|██████████████████████▍              | 4841/8000 [1:12:25<46:35,  1.13it/s]

Episode 4841/8000, real env return = -104.84


 61%|██████████████████████▍              | 4851/8000 [1:12:34<46:11,  1.14it/s]

Episode 4851/8000, real env return = -103.80


 61%|██████████████████████▍              | 4861/8000 [1:12:42<44:59,  1.16it/s]

Episode 4861/8000, real env return = -104.47


 61%|██████████████████████▌              | 4871/8000 [1:12:51<45:36,  1.14it/s]

Episode 4871/8000, real env return = -103.74


 61%|██████████████████████▌              | 4881/8000 [1:13:00<45:41,  1.14it/s]

Episode 4881/8000, real env return = -102.11


 61%|██████████████████████▌              | 4891/8000 [1:13:08<44:51,  1.16it/s]

Episode 4891/8000, real env return = -109.89


 61%|██████████████████████▋              | 4901/8000 [1:13:17<44:39,  1.16it/s]

Episode 4901/8000, real env return = -109.25


 61%|██████████████████████▋              | 4911/8000 [1:13:26<44:53,  1.15it/s]

Episode 4911/8000, real env return = -108.83


 62%|██████████████████████▊              | 4921/8000 [1:13:35<44:03,  1.16it/s]

Episode 4921/8000, real env return = -107.95


 62%|██████████████████████▊              | 4931/8000 [1:13:43<44:16,  1.16it/s]

Episode 4931/8000, real env return = -108.10


 62%|██████████████████████▊              | 4941/8000 [1:13:52<44:38,  1.14it/s]

Episode 4941/8000, real env return = -109.31


 62%|██████████████████████▉              | 4951/8000 [1:14:01<45:16,  1.12it/s]

Episode 4951/8000, real env return = -107.98


 62%|██████████████████████▉              | 4961/8000 [1:14:10<49:06,  1.03it/s]

Episode 4961/8000, real env return = -108.23


 62%|██████████████████████▉              | 4971/8000 [1:14:20<46:52,  1.08it/s]

Episode 4971/8000, real env return = -108.60


 62%|███████████████████████              | 4981/8000 [1:14:29<45:57,  1.09it/s]

Episode 4981/8000, real env return = -108.10


 62%|███████████████████████              | 4991/8000 [1:14:38<46:40,  1.07it/s]

Episode 4991/8000, real env return = -107.91


 63%|███████████████████████▏             | 5001/8000 [1:14:47<44:54,  1.11it/s]

Episode 5001/8000, real env return = -107.93


 63%|███████████████████████▏             | 5011/8000 [1:14:56<44:08,  1.13it/s]

Episode 5011/8000, real env return = -107.78


 63%|███████████████████████▏             | 5021/8000 [1:15:05<43:37,  1.14it/s]

Episode 5021/8000, real env return = -108.44


 63%|███████████████████████▎             | 5031/8000 [1:15:13<42:44,  1.16it/s]

Episode 5031/8000, real env return = -108.33


 63%|███████████████████████▎             | 5041/8000 [1:15:22<43:38,  1.13it/s]

Episode 5041/8000, real env return = -109.92


 63%|███████████████████████▎             | 5051/8000 [1:15:31<43:14,  1.14it/s]

Episode 5051/8000, real env return = -107.58


 63%|███████████████████████▍             | 5061/8000 [1:15:40<44:05,  1.11it/s]

Episode 5061/8000, real env return = -108.11


 63%|███████████████████████▍             | 5071/8000 [1:15:49<45:58,  1.06it/s]

Episode 5071/8000, real env return = -108.91


 64%|███████████████████████▍             | 5081/8000 [1:15:58<45:12,  1.08it/s]

Episode 5081/8000, real env return = -108.09


 64%|███████████████████████▌             | 5091/8000 [1:16:07<43:09,  1.12it/s]

Episode 5091/8000, real env return = -108.44


 64%|███████████████████████▌             | 5101/8000 [1:16:16<41:11,  1.17it/s]

Episode 5101/8000, real env return = -109.10


 64%|███████████████████████▋             | 5111/8000 [1:16:25<41:42,  1.15it/s]

Episode 5111/8000, real env return = -108.17


 64%|███████████████████████▋             | 5121/8000 [1:16:33<41:44,  1.15it/s]

Episode 5121/8000, real env return = -108.25


 64%|███████████████████████▋             | 5131/8000 [1:16:42<41:22,  1.16it/s]

Episode 5131/8000, real env return = -108.34


 64%|███████████████████████▊             | 5141/8000 [1:16:51<42:03,  1.13it/s]

Episode 5141/8000, real env return = -107.78


 64%|███████████████████████▊             | 5151/8000 [1:16:59<41:44,  1.14it/s]

Episode 5151/8000, real env return = -108.54


 65%|███████████████████████▊             | 5161/8000 [1:17:08<41:37,  1.14it/s]

Episode 5161/8000, real env return = -108.22


 65%|███████████████████████▉             | 5171/8000 [1:17:17<41:16,  1.14it/s]

Episode 5171/8000, real env return = -107.95


 65%|███████████████████████▉             | 5181/8000 [1:17:26<41:30,  1.13it/s]

Episode 5181/8000, real env return = -107.44


 65%|████████████████████████             | 5191/8000 [1:17:35<41:40,  1.12it/s]

Episode 5191/8000, real env return = -108.82


 65%|████████████████████████             | 5201/8000 [1:17:44<39:54,  1.17it/s]

Episode 5201/8000, real env return = -108.46


 65%|████████████████████████             | 5211/8000 [1:17:52<40:54,  1.14it/s]

Episode 5211/8000, real env return = -107.39


 65%|████████████████████████▏            | 5221/8000 [1:18:01<40:44,  1.14it/s]

Episode 5221/8000, real env return = -108.10


 65%|████████████████████████▏            | 5231/8000 [1:18:10<40:16,  1.15it/s]

Episode 5231/8000, real env return = -109.27


 66%|████████████████████████▏            | 5241/8000 [1:18:19<40:57,  1.12it/s]

Episode 5241/8000, real env return = -107.56


 66%|████████████████████████▎            | 5251/8000 [1:18:28<40:49,  1.12it/s]

Episode 5251/8000, real env return = -101.95


 66%|████████████████████████▎            | 5261/8000 [1:18:36<40:09,  1.14it/s]

Episode 5261/8000, real env return = -102.32


 66%|████████████████████████▍            | 5271/8000 [1:18:45<40:47,  1.11it/s]

Episode 5271/8000, real env return = -108.54


 66%|████████████████████████▍            | 5281/8000 [1:18:54<39:14,  1.15it/s]

Episode 5281/8000, real env return = -107.70


 66%|████████████████████████▍            | 5291/8000 [1:19:03<39:27,  1.14it/s]

Episode 5291/8000, real env return = -103.05


 66%|████████████████████████▌            | 5301/8000 [1:19:12<40:13,  1.12it/s]

Episode 5301/8000, real env return = -107.69


 66%|████████████████████████▌            | 5311/8000 [1:19:20<38:42,  1.16it/s]

Episode 5311/8000, real env return = -108.64


 67%|████████████████████████▌            | 5321/8000 [1:19:29<38:10,  1.17it/s]

Episode 5321/8000, real env return = -108.12


 67%|████████████████████████▋            | 5331/8000 [1:19:38<39:18,  1.13it/s]

Episode 5331/8000, real env return = -102.42


 67%|████████████████████████▋            | 5341/8000 [1:19:46<38:11,  1.16it/s]

Episode 5341/8000, real env return = -108.53


 67%|████████████████████████▋            | 5351/8000 [1:19:55<38:51,  1.14it/s]

Episode 5351/8000, real env return = -108.45


 67%|████████████████████████▊            | 5361/8000 [1:20:04<38:23,  1.15it/s]

Episode 5361/8000, real env return = -107.78


 67%|████████████████████████▊            | 5371/8000 [1:20:13<38:30,  1.14it/s]

Episode 5371/8000, real env return = -103.24


 67%|████████████████████████▉            | 5381/8000 [1:20:21<37:34,  1.16it/s]

Episode 5381/8000, real env return = -108.20


 67%|████████████████████████▉            | 5391/8000 [1:20:30<37:16,  1.17it/s]

Episode 5391/8000, real env return = -108.07


 68%|████████████████████████▉            | 5401/8000 [1:20:39<37:26,  1.16it/s]

Episode 5401/8000, real env return = -108.07


 68%|█████████████████████████            | 5411/8000 [1:20:47<37:56,  1.14it/s]

Episode 5411/8000, real env return = -105.29


 68%|█████████████████████████            | 5421/8000 [1:20:56<37:48,  1.14it/s]

Episode 5421/8000, real env return = -108.00


 68%|█████████████████████████            | 5431/8000 [1:21:05<37:59,  1.13it/s]

Episode 5431/8000, real env return = -108.90


 68%|█████████████████████████▏           | 5441/8000 [1:21:14<37:07,  1.15it/s]

Episode 5441/8000, real env return = -107.61


 68%|█████████████████████████▏           | 5451/8000 [1:21:22<37:03,  1.15it/s]

Episode 5451/8000, real env return = -104.68


 68%|█████████████████████████▎           | 5461/8000 [1:21:31<36:36,  1.16it/s]

Episode 5461/8000, real env return = -108.66


 68%|█████████████████████████▎           | 5471/8000 [1:21:40<37:13,  1.13it/s]

Episode 5471/8000, real env return = -108.55


 69%|█████████████████████████▎           | 5481/8000 [1:21:49<36:26,  1.15it/s]

Episode 5481/8000, real env return = -107.97


 69%|█████████████████████████▍           | 5491/8000 [1:21:57<37:02,  1.13it/s]

Episode 5491/8000, real env return = -106.33


 69%|█████████████████████████▍           | 5501/8000 [1:22:06<36:04,  1.15it/s]

Episode 5501/8000, real env return = -109.00


 69%|█████████████████████████▍           | 5511/8000 [1:22:15<36:18,  1.14it/s]

Episode 5511/8000, real env return = -107.96


 69%|█████████████████████████▌           | 5521/8000 [1:22:23<34:31,  1.20it/s]

Episode 5521/8000, real env return = -108.10


 69%|█████████████████████████▌           | 5531/8000 [1:22:32<34:39,  1.19it/s]

Episode 5531/8000, real env return = -103.42


 69%|█████████████████████████▋           | 5541/8000 [1:22:40<35:46,  1.15it/s]

Episode 5541/8000, real env return = -108.08


 69%|█████████████████████████▋           | 5551/8000 [1:22:49<35:32,  1.15it/s]

Episode 5551/8000, real env return = -107.42


 70%|█████████████████████████▋           | 5561/8000 [1:22:58<35:30,  1.14it/s]

Episode 5561/8000, real env return = -108.64


 70%|█████████████████████████▊           | 5571/8000 [1:23:06<35:19,  1.15it/s]

Episode 5571/8000, real env return = -103.04


 70%|█████████████████████████▊           | 5581/8000 [1:23:15<35:15,  1.14it/s]

Episode 5581/8000, real env return = -108.48


 70%|█████████████████████████▊           | 5591/8000 [1:23:24<35:02,  1.15it/s]

Episode 5591/8000, real env return = -108.84


 70%|█████████████████████████▉           | 5601/8000 [1:23:33<35:13,  1.13it/s]

Episode 5601/8000, real env return = -108.71


 70%|█████████████████████████▉           | 5611/8000 [1:23:41<35:14,  1.13it/s]

Episode 5611/8000, real env return = -102.74


 70%|█████████████████████████▉           | 5621/8000 [1:23:50<33:43,  1.18it/s]

Episode 5621/8000, real env return = -108.38


 70%|██████████████████████████           | 5631/8000 [1:23:59<35:22,  1.12it/s]

Episode 5631/8000, real env return = -108.61


 71%|██████████████████████████           | 5641/8000 [1:24:08<35:39,  1.10it/s]

Episode 5641/8000, real env return = -108.22


 71%|██████████████████████████▏          | 5651/8000 [1:24:17<34:36,  1.13it/s]

Episode 5651/8000, real env return = -102.00


 71%|██████████████████████████▏          | 5661/8000 [1:24:26<33:39,  1.16it/s]

Episode 5661/8000, real env return = -102.81


 71%|██████████████████████████▏          | 5671/8000 [1:24:34<33:09,  1.17it/s]

Episode 5671/8000, real env return = -108.25


 71%|██████████████████████████▎          | 5681/8000 [1:24:43<34:10,  1.13it/s]

Episode 5681/8000, real env return = -108.99


 71%|██████████████████████████▎          | 5691/8000 [1:24:52<32:26,  1.19it/s]

Episode 5691/8000, real env return = -103.37


 71%|██████████████████████████▎          | 5701/8000 [1:25:00<33:20,  1.15it/s]

Episode 5701/8000, real env return = -103.56


 71%|██████████████████████████▍          | 5711/8000 [1:25:09<32:51,  1.16it/s]

Episode 5711/8000, real env return = -109.22


 72%|██████████████████████████▍          | 5721/8000 [1:25:18<33:42,  1.13it/s]

Episode 5721/8000, real env return = -107.93


 72%|██████████████████████████▌          | 5731/8000 [1:25:27<32:57,  1.15it/s]

Episode 5731/8000, real env return = -103.47


 72%|██████████████████████████▌          | 5741/8000 [1:25:35<33:08,  1.14it/s]

Episode 5741/8000, real env return = -103.65


 72%|██████████████████████████▌          | 5751/8000 [1:25:44<32:51,  1.14it/s]

Episode 5751/8000, real env return = -107.62


 72%|██████████████████████████▋          | 5761/8000 [1:25:53<32:30,  1.15it/s]

Episode 5761/8000, real env return = -108.76


 72%|██████████████████████████▋          | 5771/8000 [1:26:02<32:29,  1.14it/s]

Episode 5771/8000, real env return = -103.23


 72%|██████████████████████████▋          | 5781/8000 [1:26:10<32:51,  1.13it/s]

Episode 5781/8000, real env return = -103.11


 72%|██████████████████████████▊          | 5791/8000 [1:26:19<32:21,  1.14it/s]

Episode 5791/8000, real env return = -108.14


 73%|██████████████████████████▊          | 5801/8000 [1:26:28<30:54,  1.19it/s]

Episode 5801/8000, real env return = -108.69


 73%|██████████████████████████▉          | 5811/8000 [1:26:36<31:38,  1.15it/s]

Episode 5811/8000, real env return = -103.32


 73%|██████████████████████████▉          | 5821/8000 [1:26:45<32:07,  1.13it/s]

Episode 5821/8000, real env return = -103.67


 73%|██████████████████████████▉          | 5831/8000 [1:26:54<31:35,  1.14it/s]

Episode 5831/8000, real env return = -109.03


 73%|███████████████████████████          | 5841/8000 [1:27:02<31:38,  1.14it/s]

Episode 5841/8000, real env return = -108.52


 73%|███████████████████████████          | 5851/8000 [1:27:11<31:38,  1.13it/s]

Episode 5851/8000, real env return = -103.66


 73%|███████████████████████████          | 5861/8000 [1:27:20<30:51,  1.16it/s]

Episode 5861/8000, real env return = -103.29


 73%|███████████████████████████▏         | 5871/8000 [1:27:29<30:40,  1.16it/s]

Episode 5871/8000, real env return = -108.60


 74%|███████████████████████████▏         | 5881/8000 [1:27:38<30:49,  1.15it/s]

Episode 5881/8000, real env return = -108.38


 74%|███████████████████████████▏         | 5891/8000 [1:27:46<30:52,  1.14it/s]

Episode 5891/8000, real env return = -103.32


 74%|███████████████████████████▎         | 5901/8000 [1:27:55<30:31,  1.15it/s]

Episode 5901/8000, real env return = -103.51


 74%|███████████████████████████▎         | 5911/8000 [1:28:04<30:36,  1.14it/s]

Episode 5911/8000, real env return = -108.62


 74%|███████████████████████████▍         | 5921/8000 [1:28:12<29:57,  1.16it/s]

Episode 5921/8000, real env return = -108.08


 74%|███████████████████████████▍         | 5931/8000 [1:28:21<29:46,  1.16it/s]

Episode 5931/8000, real env return = -103.24


 74%|███████████████████████████▍         | 5941/8000 [1:28:30<30:19,  1.13it/s]

Episode 5941/8000, real env return = -104.32


 74%|███████████████████████████▌         | 5951/8000 [1:28:39<30:15,  1.13it/s]

Episode 5951/8000, real env return = -108.56


 75%|███████████████████████████▌         | 5961/8000 [1:28:47<29:55,  1.14it/s]

Episode 5961/8000, real env return = -107.93


 75%|███████████████████████████▌         | 5971/8000 [1:28:56<29:55,  1.13it/s]

Episode 5971/8000, real env return = -102.94


 75%|███████████████████████████▋         | 5981/8000 [1:29:05<29:32,  1.14it/s]

Episode 5981/8000, real env return = -103.19


 75%|███████████████████████████▋         | 5991/8000 [1:29:14<29:25,  1.14it/s]

Episode 5991/8000, real env return = -108.70


 75%|███████████████████████████▊         | 6001/8000 [1:29:22<29:23,  1.13it/s]

Episode 6001/8000, real env return = -107.56


 75%|███████████████████████████▊         | 6011/8000 [1:29:31<28:35,  1.16it/s]

Episode 6011/8000, real env return = -103.49


 75%|███████████████████████████▊         | 6021/8000 [1:29:40<29:05,  1.13it/s]

Episode 6021/8000, real env return = -103.57


 75%|███████████████████████████▉         | 6031/8000 [1:29:49<28:47,  1.14it/s]

Episode 6031/8000, real env return = -107.20


 76%|███████████████████████████▉         | 6041/8000 [1:29:57<28:30,  1.15it/s]

Episode 6041/8000, real env return = -107.84


 76%|███████████████████████████▉         | 6051/8000 [1:30:06<27:59,  1.16it/s]

Episode 6051/8000, real env return = -105.48


 76%|████████████████████████████         | 6061/8000 [1:30:15<28:32,  1.13it/s]

Episode 6061/8000, real env return = -103.50


 76%|████████████████████████████         | 6071/8000 [1:30:24<28:31,  1.13it/s]

Episode 6071/8000, real env return = -103.96


 76%|████████████████████████████         | 6081/8000 [1:30:33<28:22,  1.13it/s]

Episode 6081/8000, real env return = -108.13


 76%|████████████████████████████▏        | 6091/8000 [1:30:42<27:15,  1.17it/s]

Episode 6091/8000, real env return = -104.27


 76%|████████████████████████████▏        | 6101/8000 [1:30:50<27:49,  1.14it/s]

Episode 6101/8000, real env return = -102.04


 76%|████████████████████████████▎        | 6111/8000 [1:30:59<26:16,  1.20it/s]

Episode 6111/8000, real env return = -103.30


 77%|████████████████████████████▎        | 6121/8000 [1:31:07<26:09,  1.20it/s]

Episode 6121/8000, real env return = -107.91


 77%|████████████████████████████▎        | 6131/8000 [1:31:16<26:45,  1.16it/s]

Episode 6131/8000, real env return = -103.72


 77%|████████████████████████████▍        | 6141/8000 [1:31:24<26:41,  1.16it/s]

Episode 6141/8000, real env return = -103.58


 77%|████████████████████████████▍        | 6151/8000 [1:31:32<26:48,  1.15it/s]

Episode 6151/8000, real env return = -104.18


 77%|████████████████████████████▍        | 6161/8000 [1:31:41<26:20,  1.16it/s]

Episode 6161/8000, real env return = -108.27


 77%|████████████████████████████▌        | 6171/8000 [1:31:49<25:27,  1.20it/s]

Episode 6171/8000, real env return = -103.73


 77%|████████████████████████████▌        | 6181/8000 [1:31:58<27:01,  1.12it/s]

Episode 6181/8000, real env return = -103.47


 77%|████████████████████████████▋        | 6191/8000 [1:32:07<25:58,  1.16it/s]

Episode 6191/8000, real env return = -103.43


 78%|████████████████████████████▋        | 6201/8000 [1:32:15<25:55,  1.16it/s]

Episode 6201/8000, real env return = -109.26


 78%|████████████████████████████▋        | 6211/8000 [1:32:24<24:54,  1.20it/s]

Episode 6211/8000, real env return = -103.31


 78%|████████████████████████████▊        | 6221/8000 [1:32:32<25:26,  1.17it/s]

Episode 6221/8000, real env return = -103.62


 78%|████████████████████████████▊        | 6231/8000 [1:32:41<25:12,  1.17it/s]

Episode 6231/8000, real env return = -103.75


 78%|████████████████████████████▊        | 6241/8000 [1:32:49<25:16,  1.16it/s]

Episode 6241/8000, real env return = -108.94


 78%|████████████████████████████▉        | 6251/8000 [1:32:58<24:22,  1.20it/s]

Episode 6251/8000, real env return = -103.88


 78%|████████████████████████████▉        | 6261/8000 [1:33:06<24:36,  1.18it/s]

Episode 6261/8000, real env return = -105.07


 78%|█████████████████████████████        | 6271/8000 [1:33:15<24:45,  1.16it/s]

Episode 6271/8000, real env return = -107.02


 79%|█████████████████████████████        | 6281/8000 [1:33:23<25:01,  1.14it/s]

Episode 6281/8000, real env return = -108.23


 79%|█████████████████████████████        | 6291/8000 [1:33:32<23:35,  1.21it/s]

Episode 6291/8000, real env return = -103.69


 79%|█████████████████████████████▏       | 6301/8000 [1:33:40<23:33,  1.20it/s]

Episode 6301/8000, real env return = -103.51


 79%|█████████████████████████████▏       | 6311/8000 [1:33:49<24:01,  1.17it/s]

Episode 6311/8000, real env return = -104.20


 79%|█████████████████████████████▏       | 6321/8000 [1:33:57<23:33,  1.19it/s]

Episode 6321/8000, real env return = -103.67


 79%|█████████████████████████████▎       | 6331/8000 [1:34:06<23:30,  1.18it/s]

Episode 6331/8000, real env return = -103.46


 79%|█████████████████████████████▎       | 6341/8000 [1:34:14<24:19,  1.14it/s]

Episode 6341/8000, real env return = -109.25


 79%|█████████████████████████████▎       | 6351/8000 [1:34:23<23:34,  1.17it/s]

Episode 6351/8000, real env return = -104.02


 80%|█████████████████████████████▍       | 6361/8000 [1:34:31<22:35,  1.21it/s]

Episode 6361/8000, real env return = -103.67


 80%|█████████████████████████████▍       | 6371/8000 [1:34:39<22:52,  1.19it/s]

Episode 6371/8000, real env return = -103.79


 80%|█████████████████████████████▌       | 6381/8000 [1:34:48<22:55,  1.18it/s]

Episode 6381/8000, real env return = -103.55


 80%|█████████████████████████████▌       | 6391/8000 [1:34:56<22:39,  1.18it/s]

Episode 6391/8000, real env return = -103.83


 80%|█████████████████████████████▌       | 6401/8000 [1:35:05<22:19,  1.19it/s]

Episode 6401/8000, real env return = -107.50


 80%|█████████████████████████████▋       | 6411/8000 [1:35:13<22:29,  1.18it/s]

Episode 6411/8000, real env return = -104.48


 80%|█████████████████████████████▋       | 6421/8000 [1:35:22<22:16,  1.18it/s]

Episode 6421/8000, real env return = -103.09


 80%|█████████████████████████████▋       | 6431/8000 [1:35:30<22:08,  1.18it/s]

Episode 6431/8000, real env return = -101.77


 81%|█████████████████████████████▊       | 6441/8000 [1:35:39<21:47,  1.19it/s]

Episode 6441/8000, real env return = -103.38


 81%|█████████████████████████████▊       | 6451/8000 [1:35:47<22:34,  1.14it/s]

Episode 6451/8000, real env return = -103.80


 81%|█████████████████████████████▉       | 6461/8000 [1:35:56<21:50,  1.17it/s]

Episode 6461/8000, real env return = -103.43


 81%|█████████████████████████████▉       | 6471/8000 [1:36:04<20:49,  1.22it/s]

Episode 6471/8000, real env return = -102.84


 81%|█████████████████████████████▉       | 6481/8000 [1:36:12<21:13,  1.19it/s]

Episode 6481/8000, real env return = -102.95


 81%|██████████████████████████████       | 6491/8000 [1:36:21<23:39,  1.06it/s]

Episode 6491/8000, real env return = -103.35


 81%|██████████████████████████████       | 6501/8000 [1:36:30<23:16,  1.07it/s]

Episode 6501/8000, real env return = -103.49


 81%|██████████████████████████████       | 6511/8000 [1:36:39<23:01,  1.08it/s]

Episode 6511/8000, real env return = -103.36


 82%|██████████████████████████████▏      | 6521/8000 [1:36:49<22:43,  1.08it/s]

Episode 6521/8000, real env return = -103.44


 82%|██████████████████████████████▏      | 6531/8000 [1:36:58<23:21,  1.05it/s]

Episode 6531/8000, real env return = -107.91


 82%|██████████████████████████████▎      | 6541/8000 [1:37:08<22:30,  1.08it/s]

Episode 6541/8000, real env return = -106.74


 82%|██████████████████████████████▎      | 6551/8000 [1:37:17<23:19,  1.04it/s]

Episode 6551/8000, real env return = -107.43


 82%|██████████████████████████████▎      | 6561/8000 [1:37:26<21:13,  1.13it/s]

Episode 6561/8000, real env return = -107.40


 82%|██████████████████████████████▍      | 6571/8000 [1:37:35<21:50,  1.09it/s]

Episode 6571/8000, real env return = -107.60


 82%|██████████████████████████████▍      | 6581/8000 [1:37:44<22:18,  1.06it/s]

Episode 6581/8000, real env return = -107.48


 82%|██████████████████████████████▍      | 6591/8000 [1:37:53<21:19,  1.10it/s]

Episode 6591/8000, real env return = -107.88


 83%|██████████████████████████████▌      | 6601/8000 [1:38:02<21:02,  1.11it/s]

Episode 6601/8000, real env return = -108.63


 83%|██████████████████████████████▌      | 6611/8000 [1:38:12<21:00,  1.10it/s]

Episode 6611/8000, real env return = -108.17


 83%|██████████████████████████████▌      | 6621/8000 [1:38:21<20:54,  1.10it/s]

Episode 6621/8000, real env return = -107.97


 83%|██████████████████████████████▋      | 6631/8000 [1:38:30<21:06,  1.08it/s]

Episode 6631/8000, real env return = -107.01


 83%|██████████████████████████████▋      | 6641/8000 [1:38:38<20:05,  1.13it/s]

Episode 6641/8000, real env return = -100.92


 83%|██████████████████████████████▊      | 6651/8000 [1:38:47<19:04,  1.18it/s]

Episode 6651/8000, real env return = -104.13


 83%|██████████████████████████████▊      | 6661/8000 [1:38:56<18:53,  1.18it/s]

Episode 6661/8000, real env return = -106.72


 83%|██████████████████████████████▊      | 6671/8000 [1:39:05<19:05,  1.16it/s]

Episode 6671/8000, real env return = -107.60


 84%|██████████████████████████████▉      | 6681/8000 [1:39:14<19:42,  1.12it/s]

Episode 6681/8000, real env return = -104.82


 84%|██████████████████████████████▉      | 6691/8000 [1:39:22<18:41,  1.17it/s]

Episode 6691/8000, real env return = -100.88


 84%|██████████████████████████████▉      | 6701/8000 [1:39:31<19:23,  1.12it/s]

Episode 6701/8000, real env return = -106.09


 84%|███████████████████████████████      | 6711/8000 [1:39:40<19:45,  1.09it/s]

Episode 6711/8000, real env return = -106.15


 84%|███████████████████████████████      | 6721/8000 [1:39:49<18:09,  1.17it/s]

Episode 6721/8000, real env return = -107.43


 84%|███████████████████████████████▏     | 6731/8000 [1:39:58<19:03,  1.11it/s]

Episode 6731/8000, real env return = -108.17


 84%|███████████████████████████████▏     | 6741/8000 [1:40:07<18:13,  1.15it/s]

Episode 6741/8000, real env return = -107.73


 84%|███████████████████████████████▏     | 6751/8000 [1:40:16<18:16,  1.14it/s]

Episode 6751/8000, real env return = -106.33


 85%|███████████████████████████████▎     | 6761/8000 [1:40:25<18:28,  1.12it/s]

Episode 6761/8000, real env return = -106.26


 85%|███████████████████████████████▎     | 6771/8000 [1:40:33<17:26,  1.17it/s]

Episode 6771/8000, real env return = -105.89


 85%|███████████████████████████████▎     | 6781/8000 [1:40:42<17:38,  1.15it/s]

Episode 6781/8000, real env return = -107.00


 85%|███████████████████████████████▍     | 6791/8000 [1:40:51<17:39,  1.14it/s]

Episode 6791/8000, real env return = -105.74


 85%|███████████████████████████████▍     | 6801/8000 [1:40:59<16:58,  1.18it/s]

Episode 6801/8000, real env return = -104.97


 85%|███████████████████████████████▌     | 6811/8000 [1:41:08<17:00,  1.17it/s]

Episode 6811/8000, real env return = -106.33


 85%|███████████████████████████████▌     | 6821/8000 [1:41:16<16:49,  1.17it/s]

Episode 6821/8000, real env return = -106.53


 85%|███████████████████████████████▌     | 6831/8000 [1:41:25<16:22,  1.19it/s]

Episode 6831/8000, real env return = -106.39


 86%|███████████████████████████████▋     | 6841/8000 [1:41:34<16:19,  1.18it/s]

Episode 6841/8000, real env return = -106.83


 86%|███████████████████████████████▋     | 6851/8000 [1:41:42<16:17,  1.18it/s]

Episode 6851/8000, real env return = -106.39


 86%|███████████████████████████████▋     | 6861/8000 [1:41:50<16:16,  1.17it/s]

Episode 6861/8000, real env return = -106.90


 86%|███████████████████████████████▊     | 6871/8000 [1:41:59<16:48,  1.12it/s]

Episode 6871/8000, real env return = -106.34


 86%|███████████████████████████████▊     | 6881/8000 [1:42:08<15:45,  1.18it/s]

Episode 6881/8000, real env return = -106.71


 86%|███████████████████████████████▊     | 6891/8000 [1:42:16<15:30,  1.19it/s]

Episode 6891/8000, real env return = -108.01


 86%|███████████████████████████████▉     | 6901/8000 [1:42:25<15:33,  1.18it/s]

Episode 6901/8000, real env return = -106.35


 86%|███████████████████████████████▉     | 6911/8000 [1:42:34<16:08,  1.12it/s]

Episode 6911/8000, real env return = -106.31


 87%|████████████████████████████████     | 6921/8000 [1:42:42<15:19,  1.17it/s]

Episode 6921/8000, real env return = -106.24


 87%|████████████████████████████████     | 6931/8000 [1:42:51<16:00,  1.11it/s]

Episode 6931/8000, real env return = -108.16


 87%|████████████████████████████████     | 6941/8000 [1:43:00<14:48,  1.19it/s]

Episode 6941/8000, real env return = -106.46


 87%|████████████████████████████████▏    | 6951/8000 [1:43:08<14:55,  1.17it/s]

Episode 6951/8000, real env return = -105.97


 87%|████████████████████████████████▏    | 6961/8000 [1:43:17<14:24,  1.20it/s]

Episode 6961/8000, real env return = -106.17


 87%|████████████████████████████████▏    | 6971/8000 [1:43:25<14:33,  1.18it/s]

Episode 6971/8000, real env return = -107.12


 87%|████████████████████████████████▎    | 6981/8000 [1:43:34<14:09,  1.20it/s]

Episode 6981/8000, real env return = -106.36


 87%|████████████████████████████████▎    | 6991/8000 [1:43:42<14:31,  1.16it/s]

Episode 6991/8000, real env return = -106.28


 88%|████████████████████████████████▍    | 7001/8000 [1:43:51<14:30,  1.15it/s]

Episode 7001/8000, real env return = -106.24


 88%|████████████████████████████████▍    | 7011/8000 [1:43:59<14:07,  1.17it/s]

Episode 7011/8000, real env return = -106.54


 88%|████████████████████████████████▍    | 7021/8000 [1:44:08<13:48,  1.18it/s]

Episode 7021/8000, real env return = -106.36


 88%|████████████████████████████████▌    | 7031/8000 [1:44:16<13:32,  1.19it/s]

Episode 7031/8000, real env return = -106.12


 88%|████████████████████████████████▌    | 7041/8000 [1:44:25<13:42,  1.17it/s]

Episode 7041/8000, real env return = -106.21


 88%|████████████████████████████████▌    | 7051/8000 [1:44:33<13:24,  1.18it/s]

Episode 7051/8000, real env return = -106.68


 88%|████████████████████████████████▋    | 7061/8000 [1:44:41<13:09,  1.19it/s]

Episode 7061/8000, real env return = -106.50


 88%|████████████████████████████████▋    | 7071/8000 [1:44:50<13:27,  1.15it/s]

Episode 7071/8000, real env return = -105.94


 89%|████████████████████████████████▋    | 7081/8000 [1:44:58<12:38,  1.21it/s]

Episode 7081/8000, real env return = -106.33


 89%|████████████████████████████████▊    | 7091/8000 [1:45:07<13:01,  1.16it/s]

Episode 7091/8000, real env return = -106.84


 89%|████████████████████████████████▊    | 7101/8000 [1:45:15<12:25,  1.21it/s]

Episode 7101/8000, real env return = -106.52


 89%|████████████████████████████████▉    | 7111/8000 [1:45:24<12:22,  1.20it/s]

Episode 7111/8000, real env return = -105.90


 89%|████████████████████████████████▉    | 7121/8000 [1:45:32<12:08,  1.21it/s]

Episode 7121/8000, real env return = -106.11


 89%|████████████████████████████████▉    | 7131/8000 [1:45:41<12:25,  1.17it/s]

Episode 7131/8000, real env return = -106.34


 89%|█████████████████████████████████    | 7141/8000 [1:45:49<11:56,  1.20it/s]

Episode 7141/8000, real env return = -105.93


 89%|█████████████████████████████████    | 7151/8000 [1:45:58<12:28,  1.13it/s]

Episode 7151/8000, real env return = -106.62


 90%|█████████████████████████████████    | 7161/8000 [1:46:06<12:01,  1.16it/s]

Episode 7161/8000, real env return = -106.22


 90%|█████████████████████████████████▏   | 7171/8000 [1:46:15<11:51,  1.16it/s]

Episode 7171/8000, real env return = -106.69


 90%|█████████████████████████████████▏   | 7181/8000 [1:46:23<11:57,  1.14it/s]

Episode 7181/8000, real env return = -106.38


 90%|█████████████████████████████████▎   | 7191/8000 [1:46:31<11:03,  1.22it/s]

Episode 7191/8000, real env return = -106.20


 90%|█████████████████████████████████▎   | 7201/8000 [1:46:40<11:11,  1.19it/s]

Episode 7201/8000, real env return = -104.84


 90%|█████████████████████████████████▎   | 7211/8000 [1:46:48<10:48,  1.22it/s]

Episode 7211/8000, real env return = -106.01


 90%|█████████████████████████████████▍   | 7221/8000 [1:46:57<10:42,  1.21it/s]

Episode 7221/8000, real env return = -106.05


 90%|█████████████████████████████████▍   | 7231/8000 [1:47:05<11:00,  1.16it/s]

Episode 7231/8000, real env return = -106.13


 91%|█████████████████████████████████▍   | 7241/8000 [1:47:14<10:31,  1.20it/s]

Episode 7241/8000, real env return = -106.16


 91%|█████████████████████████████████▌   | 7251/8000 [1:47:22<10:39,  1.17it/s]

Episode 7251/8000, real env return = -103.95


 91%|█████████████████████████████████▌   | 7261/8000 [1:47:31<10:27,  1.18it/s]

Episode 7261/8000, real env return = -106.35


 91%|█████████████████████████████████▋   | 7271/8000 [1:47:39<10:10,  1.19it/s]

Episode 7271/8000, real env return = -105.83


 91%|█████████████████████████████████▋   | 7281/8000 [1:47:48<10:16,  1.17it/s]

Episode 7281/8000, real env return = -105.95


 91%|█████████████████████████████████▋   | 7291/8000 [1:47:56<10:14,  1.15it/s]

Episode 7291/8000, real env return = -106.82


 91%|█████████████████████████████████▊   | 7301/8000 [1:48:05<10:10,  1.15it/s]

Episode 7301/8000, real env return = -106.43


 91%|█████████████████████████████████▊   | 7311/8000 [1:48:13<09:33,  1.20it/s]

Episode 7311/8000, real env return = -106.33


 92%|█████████████████████████████████▊   | 7321/8000 [1:48:22<09:29,  1.19it/s]

Episode 7321/8000, real env return = -106.22


 92%|█████████████████████████████████▉   | 7331/8000 [1:48:30<09:13,  1.21it/s]

Episode 7331/8000, real env return = -106.50


 92%|█████████████████████████████████▉   | 7341/8000 [1:48:39<09:41,  1.13it/s]

Episode 7341/8000, real env return = -105.89


 92%|█████████████████████████████████▉   | 7351/8000 [1:48:47<09:17,  1.17it/s]

Episode 7351/8000, real env return = -106.18


 92%|██████████████████████████████████   | 7361/8000 [1:48:56<08:46,  1.21it/s]

Episode 7361/8000, real env return = -106.76


 92%|██████████████████████████████████   | 7371/8000 [1:49:04<08:43,  1.20it/s]

Episode 7371/8000, real env return = -107.28


 92%|██████████████████████████████████▏  | 7381/8000 [1:49:12<08:37,  1.20it/s]

Episode 7381/8000, real env return = -106.48


 92%|██████████████████████████████████▏  | 7391/8000 [1:49:21<08:30,  1.19it/s]

Episode 7391/8000, real env return = -106.43


 93%|██████████████████████████████████▏  | 7401/8000 [1:49:29<08:16,  1.21it/s]

Episode 7401/8000, real env return = -106.14


 93%|██████████████████████████████████▎  | 7411/8000 [1:49:38<08:08,  1.21it/s]

Episode 7411/8000, real env return = -106.85


 93%|██████████████████████████████████▎  | 7421/8000 [1:49:46<08:16,  1.17it/s]

Episode 7421/8000, real env return = -106.84


 93%|██████████████████████████████████▎  | 7431/8000 [1:49:55<07:59,  1.19it/s]

Episode 7431/8000, real env return = -105.71


 93%|██████████████████████████████████▍  | 7441/8000 [1:50:03<07:49,  1.19it/s]

Episode 7441/8000, real env return = -105.97


 93%|██████████████████████████████████▍  | 7451/8000 [1:50:12<07:37,  1.20it/s]

Episode 7451/8000, real env return = -107.40


 93%|██████████████████████████████████▌  | 7461/8000 [1:50:20<07:31,  1.19it/s]

Episode 7461/8000, real env return = -105.89


 93%|██████████████████████████████████▌  | 7471/8000 [1:50:29<07:18,  1.21it/s]

Episode 7471/8000, real env return = -106.24


 94%|██████████████████████████████████▌  | 7481/8000 [1:50:37<07:16,  1.19it/s]

Episode 7481/8000, real env return = -106.24


 94%|██████████████████████████████████▋  | 7491/8000 [1:50:46<07:20,  1.16it/s]

Episode 7491/8000, real env return = -106.34


 94%|██████████████████████████████████▋  | 7501/8000 [1:50:54<06:56,  1.20it/s]

Episode 7501/8000, real env return = -107.18


 94%|██████████████████████████████████▋  | 7511/8000 [1:51:02<06:54,  1.18it/s]

Episode 7511/8000, real env return = -106.51


 94%|██████████████████████████████████▊  | 7521/8000 [1:51:11<06:55,  1.15it/s]

Episode 7521/8000, real env return = -106.00


 94%|██████████████████████████████████▊  | 7531/8000 [1:51:20<07:11,  1.09it/s]

Episode 7531/8000, real env return = -106.80


 94%|██████████████████████████████████▉  | 7541/8000 [1:51:28<06:28,  1.18it/s]

Episode 7541/8000, real env return = -105.99


 94%|██████████████████████████████████▉  | 7551/8000 [1:51:37<06:36,  1.13it/s]

Episode 7551/8000, real env return = -106.07


 95%|██████████████████████████████████▉  | 7561/8000 [1:51:46<06:12,  1.18it/s]

Episode 7561/8000, real env return = -106.08


 95%|███████████████████████████████████  | 7571/8000 [1:51:55<06:26,  1.11it/s]

Episode 7571/8000, real env return = -106.17


 95%|███████████████████████████████████  | 7581/8000 [1:52:03<06:19,  1.10it/s]

Episode 7581/8000, real env return = -106.72


 95%|███████████████████████████████████  | 7591/8000 [1:52:12<06:04,  1.12it/s]

Episode 7591/8000, real env return = -107.05


 95%|███████████████████████████████████▏ | 7601/8000 [1:52:21<05:44,  1.16it/s]

Episode 7601/8000, real env return = -105.42


 95%|███████████████████████████████████▏ | 7611/8000 [1:52:30<05:35,  1.16it/s]

Episode 7611/8000, real env return = -106.26


 95%|███████████████████████████████████▏ | 7621/8000 [1:52:39<05:26,  1.16it/s]

Episode 7621/8000, real env return = -106.05


 95%|███████████████████████████████████▎ | 7631/8000 [1:52:48<05:27,  1.13it/s]

Episode 7631/8000, real env return = -106.43


 96%|███████████████████████████████████▎ | 7641/8000 [1:52:57<05:19,  1.12it/s]

Episode 7641/8000, real env return = -106.13


 96%|███████████████████████████████████▍ | 7651/8000 [1:53:05<04:57,  1.17it/s]

Episode 7651/8000, real env return = -107.08


 96%|███████████████████████████████████▍ | 7661/8000 [1:53:14<04:50,  1.17it/s]

Episode 7661/8000, real env return = -105.90


 96%|███████████████████████████████████▍ | 7671/8000 [1:53:23<05:03,  1.08it/s]

Episode 7671/8000, real env return = -105.02


 96%|███████████████████████████████████▌ | 7681/8000 [1:53:32<04:47,  1.11it/s]

Episode 7681/8000, real env return = -105.92


 96%|███████████████████████████████████▌ | 7691/8000 [1:53:41<04:27,  1.16it/s]

Episode 7691/8000, real env return = -105.96


 96%|███████████████████████████████████▌ | 7701/8000 [1:53:49<04:22,  1.14it/s]

Episode 7701/8000, real env return = -103.91


 96%|███████████████████████████████████▋ | 7711/8000 [1:53:58<04:11,  1.15it/s]

Episode 7711/8000, real env return = -105.70


 97%|███████████████████████████████████▋ | 7721/8000 [1:54:07<04:05,  1.14it/s]

Episode 7721/8000, real env return = -106.41


 97%|███████████████████████████████████▊ | 7731/8000 [1:54:16<03:47,  1.18it/s]

Episode 7731/8000, real env return = -104.51


 97%|███████████████████████████████████▊ | 7741/8000 [1:54:24<03:38,  1.18it/s]

Episode 7741/8000, real env return = -104.23


 97%|███████████████████████████████████▊ | 7751/8000 [1:54:33<03:30,  1.18it/s]

Episode 7751/8000, real env return = -104.13


 97%|███████████████████████████████████▉ | 7761/8000 [1:54:42<03:26,  1.15it/s]

Episode 7761/8000, real env return = -106.11


 97%|███████████████████████████████████▉ | 7771/8000 [1:54:51<03:24,  1.12it/s]

Episode 7771/8000, real env return = -104.64


 97%|███████████████████████████████████▉ | 7781/8000 [1:55:00<03:20,  1.09it/s]

Episode 7781/8000, real env return = -104.05


 97%|████████████████████████████████████ | 7791/8000 [1:55:09<03:03,  1.14it/s]

Episode 7791/8000, real env return = -107.09


 98%|████████████████████████████████████ | 7801/8000 [1:55:18<02:55,  1.13it/s]

Episode 7801/8000, real env return = -107.62


 98%|████████████████████████████████████▏| 7811/8000 [1:55:27<02:38,  1.20it/s]

Episode 7811/8000, real env return = -106.82


 98%|████████████████████████████████████▏| 7821/8000 [1:55:35<02:33,  1.16it/s]

Episode 7821/8000, real env return = -106.48


 98%|████████████████████████████████████▏| 7831/8000 [1:55:44<02:31,  1.12it/s]

Episode 7831/8000, real env return = -106.48


 98%|████████████████████████████████████▎| 7841/8000 [1:55:53<02:33,  1.03it/s]

Episode 7841/8000, real env return = -105.77


 98%|████████████████████████████████████▎| 7851/8000 [1:56:02<02:12,  1.12it/s]

Episode 7851/8000, real env return = -106.06


 98%|████████████████████████████████████▎| 7861/8000 [1:56:11<02:04,  1.12it/s]

Episode 7861/8000, real env return = -105.84


 98%|████████████████████████████████████▍| 7871/8000 [1:56:20<01:50,  1.17it/s]

Episode 7871/8000, real env return = -106.15


 99%|████████████████████████████████████▍| 7881/8000 [1:56:29<01:45,  1.13it/s]

Episode 7881/8000, real env return = -105.56


 99%|████████████████████████████████████▍| 7891/8000 [1:56:38<01:35,  1.14it/s]

Episode 7891/8000, real env return = -105.34


 99%|████████████████████████████████████▌| 7901/8000 [1:56:46<01:27,  1.13it/s]

Episode 7901/8000, real env return = -101.38


 99%|████████████████████████████████████▌| 7911/8000 [1:56:55<01:17,  1.15it/s]

Episode 7911/8000, real env return = -98.09


 99%|████████████████████████████████████▋| 7921/8000 [1:57:04<01:10,  1.12it/s]

Episode 7921/8000, real env return = -98.95


 99%|████████████████████████████████████▋| 7931/8000 [1:57:14<01:07,  1.03it/s]

Episode 7931/8000, real env return = -99.08


 99%|████████████████████████████████████▋| 7941/8000 [1:57:23<00:54,  1.07it/s]

Episode 7941/8000, real env return = -98.82


 99%|████████████████████████████████████▊| 7951/8000 [1:57:32<00:46,  1.05it/s]

Episode 7951/8000, real env return = -99.75


100%|████████████████████████████████████▊| 7961/8000 [1:57:41<00:34,  1.12it/s]

Episode 7961/8000, real env return = -99.50


100%|████████████████████████████████████▊| 7971/8000 [1:57:51<00:26,  1.12it/s]

Episode 7971/8000, real env return = -106.34


100%|████████████████████████████████████▉| 7981/8000 [1:58:00<00:17,  1.12it/s]

Episode 7981/8000, real env return = -106.30


100%|████████████████████████████████████▉| 7991/8000 [1:58:09<00:07,  1.13it/s]

Episode 7991/8000, real env return = -106.69


100%|█████████████████████████████████████| 8000/8000 [1:58:16<00:00,  1.13it/s]

Training finished.





In [6]:
train4 = main()

Using cuda device


  0%|                                          | 2/8000 [00:00<07:48, 17.08it/s]

Episode 1/8000, real env return = -103.64


  0%|                                       | 11/8000 [00:10<2:41:47,  1.22s/it]

Episode 11/8000, real env return = -79.79


  0%|                                       | 21/8000 [00:20<2:02:49,  1.08it/s]

Episode 21/8000, real env return = -116.67


  0%|▏                                      | 31/8000 [00:33<2:32:22,  1.15s/it]

Episode 31/8000, real env return = -109.32


  1%|▏                                      | 41/8000 [00:46<3:00:08,  1.36s/it]

Episode 41/8000, real env return = -103.19


  1%|▏                                      | 51/8000 [01:00<3:35:00,  1.62s/it]

Episode 51/8000, real env return = -108.31


  1%|▎                                      | 61/8000 [01:17<4:17:04,  1.94s/it]

Episode 61/8000, real env return = -98.39


  1%|▎                                      | 71/8000 [01:36<3:45:46,  1.71s/it]

Episode 71/8000, real env return = -62.53


  1%|▍                                      | 81/8000 [01:51<3:00:33,  1.37s/it]

Episode 81/8000, real env return = -168.73


  1%|▍                                      | 91/8000 [02:05<3:21:46,  1.53s/it]

Episode 91/8000, real env return = -125.63


  1%|▍                                     | 101/8000 [02:18<2:56:35,  1.34s/it]

Episode 101/8000, real env return = -90.46


  1%|▌                                     | 111/8000 [02:38<4:31:37,  2.07s/it]

Episode 111/8000, real env return = -93.98


  2%|▌                                     | 121/8000 [02:59<4:36:08,  2.10s/it]

Episode 121/8000, real env return = -146.89


  2%|▌                                     | 131/8000 [03:19<4:13:50,  1.94s/it]

Episode 131/8000, real env return = -91.34


  2%|▋                                     | 141/8000 [03:39<4:35:02,  2.10s/it]

Episode 141/8000, real env return = -142.44


  2%|▋                                     | 151/8000 [03:56<3:25:33,  1.57s/it]

Episode 151/8000, real env return = -112.80


  2%|▊                                     | 161/8000 [04:10<3:25:38,  1.57s/it]

Episode 161/8000, real env return = -141.84


  2%|▊                                     | 171/8000 [04:19<2:01:59,  1.07it/s]

Episode 171/8000, real env return = -118.03


  2%|▊                                     | 181/8000 [04:27<1:57:29,  1.11it/s]

Episode 181/8000, real env return = -116.31


  2%|▉                                     | 191/8000 [04:42<3:49:31,  1.76s/it]

Episode 191/8000, real env return = -104.81


  3%|▉                                     | 201/8000 [04:54<2:11:41,  1.01s/it]

Episode 201/8000, real env return = -111.02


  3%|█                                     | 211/8000 [05:07<3:12:18,  1.48s/it]

Episode 211/8000, real env return = -99.44


  3%|█                                     | 221/8000 [05:17<2:07:31,  1.02it/s]

Episode 221/8000, real env return = -105.10


  3%|█                                     | 231/8000 [05:38<4:28:14,  2.07s/it]

Episode 231/8000, real env return = -164.98


  3%|█▏                                    | 241/8000 [05:57<4:31:49,  2.10s/it]

Episode 241/8000, real env return = -152.19


  3%|█▏                                    | 251/8000 [06:19<4:33:41,  2.12s/it]

Episode 251/8000, real env return = -117.46


  3%|█▏                                    | 261/8000 [06:30<2:07:37,  1.01it/s]

Episode 261/8000, real env return = -110.29


  3%|█▎                                    | 271/8000 [06:40<1:54:35,  1.12it/s]

Episode 271/8000, real env return = -102.71


  4%|█▎                                    | 281/8000 [06:49<1:55:03,  1.12it/s]

Episode 281/8000, real env return = -101.89


  4%|█▍                                    | 291/8000 [07:01<3:31:25,  1.65s/it]

Episode 291/8000, real env return = -144.19


  4%|█▍                                    | 301/8000 [07:22<4:28:20,  2.09s/it]

Episode 301/8000, real env return = -157.30


  4%|█▍                                    | 311/8000 [07:38<2:45:40,  1.29s/it]

Episode 311/8000, real env return = -103.18


  4%|█▌                                    | 321/8000 [07:47<2:37:04,  1.23s/it]

Episode 321/8000, real env return = -132.39


  4%|█▌                                    | 331/8000 [07:56<1:53:55,  1.12it/s]

Episode 331/8000, real env return = -102.08


  4%|█▌                                    | 341/8000 [08:05<1:54:36,  1.11it/s]

Episode 341/8000, real env return = -101.13


  4%|█▋                                    | 351/8000 [08:15<1:50:25,  1.15it/s]

Episode 351/8000, real env return = -101.46


  5%|█▋                                    | 361/8000 [08:25<1:57:43,  1.08it/s]

Episode 361/8000, real env return = -104.43


  5%|█▊                                    | 371/8000 [08:33<1:51:11,  1.14it/s]

Episode 371/8000, real env return = -101.59


  5%|█▊                                    | 381/8000 [08:43<1:51:42,  1.14it/s]

Episode 381/8000, real env return = -102.82


  5%|█▊                                    | 391/8000 [08:52<1:47:50,  1.18it/s]

Episode 391/8000, real env return = -109.11


  5%|█▉                                    | 401/8000 [09:01<1:52:56,  1.12it/s]

Episode 401/8000, real env return = -119.56


  5%|█▉                                    | 411/8000 [09:10<1:52:05,  1.13it/s]

Episode 411/8000, real env return = -114.79


  5%|█▉                                    | 421/8000 [09:18<1:50:04,  1.15it/s]

Episode 421/8000, real env return = -114.29


  5%|██                                    | 431/8000 [09:27<1:51:14,  1.13it/s]

Episode 431/8000, real env return = -105.21


  6%|██                                    | 441/8000 [09:36<1:54:25,  1.10it/s]

Episode 441/8000, real env return = -102.53


  6%|██▏                                   | 451/8000 [09:45<1:49:49,  1.15it/s]

Episode 451/8000, real env return = -111.94


  6%|██▏                                   | 461/8000 [09:53<1:48:57,  1.15it/s]

Episode 461/8000, real env return = -106.84


  6%|██▏                                   | 471/8000 [10:02<1:45:58,  1.18it/s]

Episode 471/8000, real env return = -102.89


  6%|██▎                                   | 481/8000 [10:10<1:46:05,  1.18it/s]

Episode 481/8000, real env return = -109.53


  6%|██▎                                   | 491/8000 [10:19<1:46:17,  1.18it/s]

Episode 491/8000, real env return = -111.96


  6%|██▍                                   | 501/8000 [10:27<1:44:59,  1.19it/s]

Episode 501/8000, real env return = -110.85


  6%|██▍                                   | 511/8000 [10:36<1:46:02,  1.18it/s]

Episode 511/8000, real env return = -110.16


  7%|██▍                                   | 521/8000 [10:45<1:45:43,  1.18it/s]

Episode 521/8000, real env return = -111.33


  7%|██▌                                   | 531/8000 [10:53<1:46:35,  1.17it/s]

Episode 531/8000, real env return = -110.71


  7%|██▌                                   | 541/8000 [11:02<1:44:52,  1.19it/s]

Episode 541/8000, real env return = -109.72


  7%|██▌                                   | 551/8000 [11:10<1:45:20,  1.18it/s]

Episode 551/8000, real env return = -109.04


  7%|██▋                                   | 561/8000 [11:19<1:50:00,  1.13it/s]

Episode 561/8000, real env return = -111.00


  7%|██▋                                   | 571/8000 [11:28<1:43:44,  1.19it/s]

Episode 571/8000, real env return = -110.38


  7%|██▊                                   | 581/8000 [11:36<1:45:58,  1.17it/s]

Episode 581/8000, real env return = -109.94


  7%|██▊                                   | 591/8000 [11:45<1:49:05,  1.13it/s]

Episode 591/8000, real env return = -109.61


  8%|██▊                                   | 601/8000 [11:54<1:51:21,  1.11it/s]

Episode 601/8000, real env return = -110.42


  8%|██▉                                   | 611/8000 [12:02<1:44:37,  1.18it/s]

Episode 611/8000, real env return = -110.18


  8%|██▉                                   | 621/8000 [12:11<1:45:49,  1.16it/s]

Episode 621/8000, real env return = -110.04


  8%|██▉                                   | 631/8000 [12:19<1:45:11,  1.17it/s]

Episode 631/8000, real env return = -110.03


  8%|███                                   | 641/8000 [12:28<1:47:35,  1.14it/s]

Episode 641/8000, real env return = -110.14


  8%|███                                   | 651/8000 [12:36<1:46:35,  1.15it/s]

Episode 651/8000, real env return = -110.05


  8%|███▏                                  | 661/8000 [12:45<1:42:36,  1.19it/s]

Episode 661/8000, real env return = -110.55


  8%|███▏                                  | 671/8000 [12:53<1:43:07,  1.18it/s]

Episode 671/8000, real env return = -111.56


  9%|███▏                                  | 681/8000 [13:02<1:46:07,  1.15it/s]

Episode 681/8000, real env return = -110.12


  9%|███▎                                  | 691/8000 [13:11<1:44:12,  1.17it/s]

Episode 691/8000, real env return = -104.44


  9%|███▎                                  | 701/8000 [13:19<1:40:59,  1.20it/s]

Episode 701/8000, real env return = -107.63


  9%|███▍                                  | 711/8000 [13:28<1:45:56,  1.15it/s]

Episode 711/8000, real env return = -109.82


  9%|███▍                                  | 721/8000 [13:36<1:44:58,  1.16it/s]

Episode 721/8000, real env return = -109.37


  9%|███▍                                  | 731/8000 [13:45<1:50:00,  1.10it/s]

Episode 731/8000, real env return = -101.16


  9%|███▌                                  | 741/8000 [13:53<1:41:56,  1.19it/s]

Episode 741/8000, real env return = -109.90


  9%|███▌                                  | 751/8000 [14:02<1:40:46,  1.20it/s]

Episode 751/8000, real env return = -109.26


 10%|███▌                                  | 761/8000 [14:11<1:41:37,  1.19it/s]

Episode 761/8000, real env return = -109.56


 10%|███▋                                  | 771/8000 [14:19<1:45:06,  1.15it/s]

Episode 771/8000, real env return = -108.98


 10%|███▋                                  | 781/8000 [14:28<1:44:09,  1.16it/s]

Episode 781/8000, real env return = -108.74


 10%|███▊                                  | 791/8000 [14:36<1:46:43,  1.13it/s]

Episode 791/8000, real env return = -109.51


 10%|███▊                                  | 801/8000 [14:45<1:44:03,  1.15it/s]

Episode 801/8000, real env return = -109.37


 10%|███▊                                  | 811/8000 [14:53<1:39:21,  1.21it/s]

Episode 811/8000, real env return = -108.78


 10%|███▉                                  | 821/8000 [15:02<1:43:47,  1.15it/s]

Episode 821/8000, real env return = -111.83


 10%|███▉                                  | 831/8000 [15:10<1:41:24,  1.18it/s]

Episode 831/8000, real env return = -109.84


 11%|███▉                                  | 841/8000 [15:19<1:40:30,  1.19it/s]

Episode 841/8000, real env return = -110.45


 11%|████                                  | 851/8000 [15:28<1:42:32,  1.16it/s]

Episode 851/8000, real env return = -110.16


 11%|████                                  | 861/8000 [15:37<1:44:07,  1.14it/s]

Episode 861/8000, real env return = -110.89


 11%|████▏                                 | 871/8000 [15:46<1:44:34,  1.14it/s]

Episode 871/8000, real env return = -110.48


 11%|████▏                                 | 881/8000 [15:54<1:39:44,  1.19it/s]

Episode 881/8000, real env return = -110.39


 11%|████▏                                 | 891/8000 [16:03<1:41:04,  1.17it/s]

Episode 891/8000, real env return = -109.87


 11%|████▎                                 | 901/8000 [16:11<1:39:56,  1.18it/s]

Episode 901/8000, real env return = -109.46


 11%|████▎                                 | 911/8000 [16:20<1:41:57,  1.16it/s]

Episode 911/8000, real env return = -110.15


 12%|████▎                                 | 921/8000 [16:28<1:41:24,  1.16it/s]

Episode 921/8000, real env return = -109.38


 12%|████▍                                 | 931/8000 [16:37<1:38:33,  1.20it/s]

Episode 931/8000, real env return = -112.10


 12%|████▍                                 | 941/8000 [16:45<1:44:17,  1.13it/s]

Episode 941/8000, real env return = -109.99


 12%|████▌                                 | 951/8000 [16:54<1:40:02,  1.17it/s]

Episode 951/8000, real env return = -110.70


 12%|████▌                                 | 961/8000 [17:02<1:40:16,  1.17it/s]

Episode 961/8000, real env return = -114.44


 12%|████▌                                 | 971/8000 [17:11<1:41:15,  1.16it/s]

Episode 971/8000, real env return = -110.46


 12%|████▋                                 | 981/8000 [17:20<1:37:26,  1.20it/s]

Episode 981/8000, real env return = -109.16


 12%|████▋                                 | 991/8000 [17:28<1:38:05,  1.19it/s]

Episode 991/8000, real env return = -110.45


 13%|████▋                                | 1001/8000 [17:37<1:42:47,  1.13it/s]

Episode 1001/8000, real env return = -111.78


 13%|████▋                                | 1011/8000 [17:45<1:36:43,  1.20it/s]

Episode 1011/8000, real env return = -111.83


 13%|████▋                                | 1021/8000 [17:54<1:39:24,  1.17it/s]

Episode 1021/8000, real env return = -112.50


 13%|████▊                                | 1031/8000 [18:02<1:39:54,  1.16it/s]

Episode 1031/8000, real env return = -110.41


 13%|████▊                                | 1041/8000 [18:11<1:38:07,  1.18it/s]

Episode 1041/8000, real env return = -111.71


 13%|████▊                                | 1051/8000 [18:19<1:38:05,  1.18it/s]

Episode 1051/8000, real env return = -110.61


 13%|████▉                                | 1061/8000 [18:28<1:37:17,  1.19it/s]

Episode 1061/8000, real env return = -106.37


 13%|████▉                                | 1071/8000 [18:36<1:41:45,  1.13it/s]

Episode 1071/8000, real env return = -110.08


 14%|████▉                                | 1081/8000 [18:45<1:34:14,  1.22it/s]

Episode 1081/8000, real env return = -109.29


 14%|█████                                | 1091/8000 [18:53<1:40:07,  1.15it/s]

Episode 1091/8000, real env return = -110.25


 14%|█████                                | 1101/8000 [19:02<1:37:18,  1.18it/s]

Episode 1101/8000, real env return = -109.92


 14%|█████▏                               | 1111/8000 [19:10<1:36:01,  1.20it/s]

Episode 1111/8000, real env return = -109.58


 14%|█████▏                               | 1121/8000 [19:19<1:36:40,  1.19it/s]

Episode 1121/8000, real env return = -111.92


 14%|█████▏                               | 1131/8000 [19:27<1:35:26,  1.20it/s]

Episode 1131/8000, real env return = -110.75


 14%|█████▎                               | 1141/8000 [19:36<1:39:21,  1.15it/s]

Episode 1141/8000, real env return = -110.27


 14%|█████▎                               | 1151/8000 [19:45<1:39:27,  1.15it/s]

Episode 1151/8000, real env return = -113.42


 15%|█████▎                               | 1161/8000 [19:53<1:34:47,  1.20it/s]

Episode 1161/8000, real env return = -110.23


 15%|█████▍                               | 1171/8000 [20:02<1:38:17,  1.16it/s]

Episode 1171/8000, real env return = -100.13


 15%|█████▍                               | 1181/8000 [20:10<1:39:30,  1.14it/s]

Episode 1181/8000, real env return = -101.41


 15%|█████▌                               | 1191/8000 [20:19<1:36:16,  1.18it/s]

Episode 1191/8000, real env return = -101.48


 15%|█████▌                               | 1201/8000 [20:27<1:35:03,  1.19it/s]

Episode 1201/8000, real env return = -110.22


 15%|█████▌                               | 1211/8000 [20:36<1:36:22,  1.17it/s]

Episode 1211/8000, real env return = -103.44


 15%|█████▋                               | 1221/8000 [20:44<1:35:47,  1.18it/s]

Episode 1221/8000, real env return = -101.42


 15%|█████▋                               | 1231/8000 [20:53<1:35:04,  1.19it/s]

Episode 1231/8000, real env return = -101.80


 16%|█████▋                               | 1241/8000 [21:01<1:36:55,  1.16it/s]

Episode 1241/8000, real env return = -111.49


 16%|█████▊                               | 1251/8000 [21:10<1:34:42,  1.19it/s]

Episode 1251/8000, real env return = -102.25


 16%|█████▊                               | 1261/8000 [21:19<1:38:08,  1.14it/s]

Episode 1261/8000, real env return = -101.71


 16%|█████▉                               | 1271/8000 [21:27<1:34:44,  1.18it/s]

Episode 1271/8000, real env return = -101.08


 16%|█████▉                               | 1281/8000 [21:36<1:35:42,  1.17it/s]

Episode 1281/8000, real env return = -111.16


 16%|█████▉                               | 1291/8000 [21:44<1:35:54,  1.17it/s]

Episode 1291/8000, real env return = -104.21


 16%|██████                               | 1301/8000 [21:53<1:37:13,  1.15it/s]

Episode 1301/8000, real env return = -101.21


 16%|██████                               | 1311/8000 [22:01<1:34:45,  1.18it/s]

Episode 1311/8000, real env return = -101.09


 17%|██████                               | 1321/8000 [22:10<1:33:39,  1.19it/s]

Episode 1321/8000, real env return = -108.13


 17%|██████▏                              | 1331/8000 [22:18<1:34:19,  1.18it/s]

Episode 1331/8000, real env return = -103.97


 17%|██████▏                              | 1341/8000 [22:27<1:35:39,  1.16it/s]

Episode 1341/8000, real env return = -103.24


 17%|██████▏                              | 1351/8000 [22:36<1:40:01,  1.11it/s]

Episode 1351/8000, real env return = -100.95


 17%|██████▎                              | 1361/8000 [22:44<1:38:45,  1.12it/s]

Episode 1361/8000, real env return = -110.61


 17%|██████▎                              | 1371/8000 [22:53<1:35:16,  1.16it/s]

Episode 1371/8000, real env return = -102.06


 17%|██████▍                              | 1381/8000 [23:02<1:35:51,  1.15it/s]

Episode 1381/8000, real env return = -101.91


 17%|██████▍                              | 1391/8000 [23:10<1:33:00,  1.18it/s]

Episode 1391/8000, real env return = -102.10


 18%|██████▍                              | 1401/8000 [23:18<1:30:30,  1.22it/s]

Episode 1401/8000, real env return = -111.65


 18%|██████▌                              | 1411/8000 [23:27<1:32:37,  1.19it/s]

Episode 1411/8000, real env return = -101.79


 18%|██████▌                              | 1421/8000 [23:36<1:32:43,  1.18it/s]

Episode 1421/8000, real env return = -101.58


 18%|██████▌                              | 1431/8000 [23:44<1:32:55,  1.18it/s]

Episode 1431/8000, real env return = -101.97


 18%|██████▋                              | 1441/8000 [23:53<1:34:48,  1.15it/s]

Episode 1441/8000, real env return = -111.53


 18%|██████▋                              | 1451/8000 [24:01<1:32:31,  1.18it/s]

Episode 1451/8000, real env return = -102.24


 18%|██████▊                              | 1461/8000 [24:10<1:35:26,  1.14it/s]

Episode 1461/8000, real env return = -101.87


 18%|██████▊                              | 1471/8000 [24:19<1:33:17,  1.17it/s]

Episode 1471/8000, real env return = -101.95


 19%|██████▊                              | 1481/8000 [24:27<1:31:55,  1.18it/s]

Episode 1481/8000, real env return = -111.72


 19%|██████▉                              | 1491/8000 [24:36<1:29:58,  1.21it/s]

Episode 1491/8000, real env return = -110.24


 19%|██████▉                              | 1501/8000 [24:44<1:31:01,  1.19it/s]

Episode 1501/8000, real env return = -111.50


 19%|██████▉                              | 1511/8000 [24:53<1:32:13,  1.17it/s]

Episode 1511/8000, real env return = -110.76


 19%|███████                              | 1521/8000 [25:01<1:30:03,  1.20it/s]

Episode 1521/8000, real env return = -109.68


 19%|███████                              | 1531/8000 [25:10<1:32:22,  1.17it/s]

Episode 1531/8000, real env return = -109.43


 19%|███████▏                             | 1541/8000 [25:18<1:31:02,  1.18it/s]

Episode 1541/8000, real env return = -109.77


 19%|███████▏                             | 1551/8000 [25:27<1:32:36,  1.16it/s]

Episode 1551/8000, real env return = -110.13


 20%|███████▏                             | 1561/8000 [25:35<1:32:55,  1.15it/s]

Episode 1561/8000, real env return = -106.56


 20%|███████▎                             | 1571/8000 [25:44<1:31:29,  1.17it/s]

Episode 1571/8000, real env return = -110.10


 20%|███████▎                             | 1581/8000 [25:52<1:33:06,  1.15it/s]

Episode 1581/8000, real env return = -109.18


 20%|███████▎                             | 1591/8000 [26:01<1:29:34,  1.19it/s]

Episode 1591/8000, real env return = -107.65


 20%|███████▍                             | 1601/8000 [26:09<1:30:58,  1.17it/s]

Episode 1601/8000, real env return = -109.63


 20%|███████▍                             | 1611/8000 [26:18<1:28:24,  1.20it/s]

Episode 1611/8000, real env return = -107.42


 20%|███████▍                             | 1621/8000 [26:27<1:30:07,  1.18it/s]

Episode 1621/8000, real env return = -114.80


 20%|███████▌                             | 1631/8000 [26:35<1:29:50,  1.18it/s]

Episode 1631/8000, real env return = -110.09


 21%|███████▌                             | 1641/8000 [26:44<1:31:05,  1.16it/s]

Episode 1641/8000, real env return = -111.18


 21%|███████▋                             | 1651/8000 [26:52<1:28:22,  1.20it/s]

Episode 1651/8000, real env return = -109.74


 21%|███████▋                             | 1661/8000 [27:01<1:28:37,  1.19it/s]

Episode 1661/8000, real env return = -109.51


 21%|███████▋                             | 1671/8000 [27:09<1:30:20,  1.17it/s]

Episode 1671/8000, real env return = -112.37


 21%|███████▊                             | 1681/8000 [27:18<1:30:18,  1.17it/s]

Episode 1681/8000, real env return = -110.22


 21%|███████▊                             | 1691/8000 [27:26<1:28:56,  1.18it/s]

Episode 1691/8000, real env return = -118.29


 21%|███████▊                             | 1701/8000 [27:35<1:29:48,  1.17it/s]

Episode 1701/8000, real env return = -111.28


 21%|███████▉                             | 1711/8000 [27:43<1:28:03,  1.19it/s]

Episode 1711/8000, real env return = -111.93


 22%|███████▉                             | 1721/8000 [27:52<1:31:31,  1.14it/s]

Episode 1721/8000, real env return = -111.30


 22%|████████                             | 1731/8000 [28:00<1:26:20,  1.21it/s]

Episode 1731/8000, real env return = -103.51


 22%|████████                             | 1741/8000 [28:09<1:28:16,  1.18it/s]

Episode 1741/8000, real env return = -110.12


 22%|████████                             | 1751/8000 [28:17<1:28:33,  1.18it/s]

Episode 1751/8000, real env return = -111.57


 22%|████████▏                            | 1761/8000 [28:26<1:32:35,  1.12it/s]

Episode 1761/8000, real env return = -106.61


 22%|████████▏                            | 1771/8000 [28:35<1:26:42,  1.20it/s]

Episode 1771/8000, real env return = -108.31


 22%|████████▏                            | 1781/8000 [28:43<1:26:22,  1.20it/s]

Episode 1781/8000, real env return = -107.65


 22%|████████▎                            | 1791/8000 [28:52<1:26:57,  1.19it/s]

Episode 1791/8000, real env return = -110.16


 23%|████████▎                            | 1801/8000 [29:00<1:27:09,  1.19it/s]

Episode 1801/8000, real env return = -108.57


 23%|████████▍                            | 1811/8000 [29:09<1:31:56,  1.12it/s]

Episode 1811/8000, real env return = -110.42


 23%|████████▍                            | 1821/8000 [29:17<1:26:13,  1.19it/s]

Episode 1821/8000, real env return = -109.93


 23%|████████▍                            | 1831/8000 [29:26<1:30:43,  1.13it/s]

Episode 1831/8000, real env return = -111.51


 23%|████████▌                            | 1841/8000 [29:34<1:26:46,  1.18it/s]

Episode 1841/8000, real env return = -111.31


 23%|████████▌                            | 1851/8000 [29:43<1:28:07,  1.16it/s]

Episode 1851/8000, real env return = -109.83


 23%|████████▌                            | 1861/8000 [29:51<1:30:50,  1.13it/s]

Episode 1861/8000, real env return = -111.92


 23%|████████▋                            | 1871/8000 [30:00<1:27:10,  1.17it/s]

Episode 1871/8000, real env return = -107.99


 24%|████████▋                            | 1881/8000 [30:08<1:25:17,  1.20it/s]

Episode 1881/8000, real env return = -112.15


 24%|████████▋                            | 1891/8000 [30:17<1:29:32,  1.14it/s]

Episode 1891/8000, real env return = -103.14


 24%|████████▊                            | 1901/8000 [30:25<1:30:15,  1.13it/s]

Episode 1901/8000, real env return = -103.31


 24%|████████▊                            | 1911/8000 [30:34<1:26:31,  1.17it/s]

Episode 1911/8000, real env return = -102.91


 24%|████████▉                            | 1921/8000 [30:42<1:25:44,  1.18it/s]

Episode 1921/8000, real env return = -103.91


 24%|████████▉                            | 1931/8000 [30:51<1:30:24,  1.12it/s]

Episode 1931/8000, real env return = -104.28


 24%|████████▉                            | 1941/8000 [31:00<1:26:45,  1.16it/s]

Episode 1941/8000, real env return = -104.34


 24%|█████████                            | 1951/8000 [31:08<1:24:33,  1.19it/s]

Episode 1951/8000, real env return = -104.77


 25%|█████████                            | 1961/8000 [31:17<1:25:26,  1.18it/s]

Episode 1961/8000, real env return = -104.43


 25%|█████████                            | 1971/8000 [31:25<1:26:03,  1.17it/s]

Episode 1971/8000, real env return = -100.86


 25%|█████████▏                           | 1981/8000 [31:34<1:23:55,  1.20it/s]

Episode 1981/8000, real env return = -115.91


 25%|█████████▏                           | 1991/8000 [31:43<1:23:55,  1.19it/s]

Episode 1991/8000, real env return = -111.01


 25%|█████████▎                           | 2001/8000 [31:51<1:23:24,  1.20it/s]

Episode 2001/8000, real env return = -106.93


 25%|█████████▎                           | 2011/8000 [32:00<1:25:51,  1.16it/s]

Episode 2011/8000, real env return = -106.24


 25%|█████████▎                           | 2021/8000 [32:08<1:23:03,  1.20it/s]

Episode 2021/8000, real env return = -108.02


 25%|█████████▍                           | 2031/8000 [32:16<1:22:51,  1.20it/s]

Episode 2031/8000, real env return = -106.39


 26%|█████████▍                           | 2041/8000 [32:25<1:26:03,  1.15it/s]

Episode 2041/8000, real env return = -105.14


 26%|█████████▍                           | 2051/8000 [32:34<1:26:54,  1.14it/s]

Episode 2051/8000, real env return = -104.99


 26%|█████████▌                           | 2061/8000 [32:43<1:36:42,  1.02it/s]

Episode 2061/8000, real env return = -104.99


 26%|█████████▌                           | 2071/8000 [32:52<1:25:12,  1.16it/s]

Episode 2071/8000, real env return = -105.20


 26%|█████████▌                           | 2081/8000 [33:00<1:23:01,  1.19it/s]

Episode 2081/8000, real env return = -113.46


 26%|█████████▋                           | 2091/8000 [33:09<1:25:51,  1.15it/s]

Episode 2091/8000, real env return = -111.20


 26%|█████████▋                           | 2101/8000 [33:18<1:23:11,  1.18it/s]

Episode 2101/8000, real env return = -110.31


 26%|█████████▊                           | 2111/8000 [33:26<1:26:38,  1.13it/s]

Episode 2111/8000, real env return = -104.94


 27%|█████████▊                           | 2121/8000 [33:35<1:20:05,  1.22it/s]

Episode 2121/8000, real env return = -111.72


 27%|█████████▊                           | 2131/8000 [33:43<1:21:18,  1.20it/s]

Episode 2131/8000, real env return = -100.79


 27%|█████████▉                           | 2141/8000 [33:52<1:22:29,  1.18it/s]

Episode 2141/8000, real env return = -110.13


 27%|█████████▉                           | 2151/8000 [34:01<1:26:05,  1.13it/s]

Episode 2151/8000, real env return = -105.68


 27%|█████████▉                           | 2161/8000 [34:09<1:25:17,  1.14it/s]

Episode 2161/8000, real env return = -105.01


 27%|██████████                           | 2171/8000 [34:18<1:22:24,  1.18it/s]

Episode 2171/8000, real env return = -105.67


 27%|██████████                           | 2181/8000 [34:26<1:21:58,  1.18it/s]

Episode 2181/8000, real env return = -105.64


 27%|██████████▏                          | 2191/8000 [34:35<1:20:07,  1.21it/s]

Episode 2191/8000, real env return = -105.04


 28%|██████████▏                          | 2201/8000 [34:43<1:23:37,  1.16it/s]

Episode 2201/8000, real env return = -109.78


 28%|██████████▏                          | 2211/8000 [34:52<1:22:20,  1.17it/s]

Episode 2211/8000, real env return = -101.27


 28%|██████████▎                          | 2221/8000 [35:01<1:24:19,  1.14it/s]

Episode 2221/8000, real env return = -109.50


 28%|██████████▎                          | 2231/8000 [35:09<1:23:40,  1.15it/s]

Episode 2231/8000, real env return = -109.43


 28%|██████████▎                          | 2241/8000 [35:18<1:22:01,  1.17it/s]

Episode 2241/8000, real env return = -110.74


 28%|██████████▍                          | 2251/8000 [35:26<1:21:34,  1.17it/s]

Episode 2251/8000, real env return = -100.98


 28%|██████████▍                          | 2261/8000 [35:35<1:22:11,  1.16it/s]

Episode 2261/8000, real env return = -108.86


 28%|██████████▌                          | 2271/8000 [35:43<1:22:03,  1.16it/s]

Episode 2271/8000, real env return = -110.19


 29%|██████████▌                          | 2281/8000 [35:52<1:23:32,  1.14it/s]

Episode 2281/8000, real env return = -109.94


 29%|██████████▌                          | 2291/8000 [36:00<1:21:03,  1.17it/s]

Episode 2291/8000, real env return = -101.89


 29%|██████████▋                          | 2301/8000 [36:09<1:20:58,  1.17it/s]

Episode 2301/8000, real env return = -110.13


 29%|██████████▋                          | 2311/8000 [36:18<1:22:01,  1.16it/s]

Episode 2311/8000, real env return = -111.12


 29%|██████████▋                          | 2321/8000 [36:26<1:23:09,  1.14it/s]

Episode 2321/8000, real env return = -107.91


 29%|██████████▊                          | 2331/8000 [36:35<1:19:53,  1.18it/s]

Episode 2331/8000, real env return = -106.85


 29%|██████████▊                          | 2341/8000 [36:43<1:20:06,  1.18it/s]

Episode 2341/8000, real env return = -101.59


 29%|██████████▊                          | 2351/8000 [36:52<1:19:14,  1.19it/s]

Episode 2351/8000, real env return = -102.63


 30%|██████████▉                          | 2361/8000 [37:00<1:22:43,  1.14it/s]

Episode 2361/8000, real env return = -103.75


 30%|██████████▉                          | 2371/8000 [37:09<1:20:49,  1.16it/s]

Episode 2371/8000, real env return = -107.12


 30%|███████████                          | 2381/8000 [37:17<1:17:25,  1.21it/s]

Episode 2381/8000, real env return = -108.86


 30%|███████████                          | 2391/8000 [37:25<1:17:47,  1.20it/s]

Episode 2391/8000, real env return = -107.74


 30%|███████████                          | 2401/8000 [37:34<1:21:47,  1.14it/s]

Episode 2401/8000, real env return = -107.01


 30%|███████████▏                         | 2411/8000 [37:42<1:17:47,  1.20it/s]

Episode 2411/8000, real env return = -106.42


 30%|███████████▏                         | 2421/8000 [37:51<1:18:41,  1.18it/s]

Episode 2421/8000, real env return = -104.77


 30%|███████████▏                         | 2431/8000 [37:59<1:21:25,  1.14it/s]

Episode 2431/8000, real env return = -107.71


 31%|███████████▎                         | 2441/8000 [38:08<1:17:32,  1.19it/s]

Episode 2441/8000, real env return = -111.89


 31%|███████████▎                         | 2451/8000 [38:16<1:18:34,  1.18it/s]

Episode 2451/8000, real env return = -101.90


 31%|███████████▍                         | 2461/8000 [38:25<1:17:26,  1.19it/s]

Episode 2461/8000, real env return = -102.19


 31%|███████████▍                         | 2471/8000 [38:33<1:19:13,  1.16it/s]

Episode 2471/8000, real env return = -112.28


 31%|███████████▍                         | 2481/8000 [38:42<1:16:57,  1.20it/s]

Episode 2481/8000, real env return = -109.63


 31%|███████████▌                         | 2491/8000 [38:50<1:19:55,  1.15it/s]

Episode 2491/8000, real env return = -101.74


 31%|███████████▌                         | 2501/8000 [38:59<1:16:21,  1.20it/s]

Episode 2501/8000, real env return = -102.49


 31%|███████████▌                         | 2511/8000 [39:08<1:18:15,  1.17it/s]

Episode 2511/8000, real env return = -112.84


 32%|███████████▋                         | 2521/8000 [39:16<1:16:56,  1.19it/s]

Episode 2521/8000, real env return = -109.72


 32%|███████████▋                         | 2531/8000 [39:25<1:17:01,  1.18it/s]

Episode 2531/8000, real env return = -100.66


 32%|███████████▊                         | 2541/8000 [39:33<1:16:37,  1.19it/s]

Episode 2541/8000, real env return = -101.90


 32%|███████████▊                         | 2551/8000 [39:42<1:19:36,  1.14it/s]

Episode 2551/8000, real env return = -111.76


 32%|███████████▊                         | 2561/8000 [39:51<1:17:42,  1.17it/s]

Episode 2561/8000, real env return = -107.61


 32%|███████████▉                         | 2571/8000 [39:59<1:14:22,  1.22it/s]

Episode 2571/8000, real env return = -106.69


 32%|███████████▉                         | 2581/8000 [40:07<1:16:58,  1.17it/s]

Episode 2581/8000, real env return = -107.97


 32%|███████████▉                         | 2591/8000 [40:16<1:16:13,  1.18it/s]

Episode 2591/8000, real env return = -108.35


 33%|████████████                         | 2601/8000 [40:24<1:16:16,  1.18it/s]

Episode 2601/8000, real env return = -106.96


 33%|████████████                         | 2611/8000 [40:33<1:14:53,  1.20it/s]

Episode 2611/8000, real env return = -107.10


 33%|████████████                         | 2621/8000 [40:42<1:17:58,  1.15it/s]

Episode 2621/8000, real env return = -107.18


 33%|████████████▏                        | 2631/8000 [40:50<1:15:09,  1.19it/s]

Episode 2631/8000, real env return = -106.86


 33%|████████████▏                        | 2641/8000 [40:58<1:15:34,  1.18it/s]

Episode 2641/8000, real env return = -106.79


 33%|████████████▎                        | 2651/8000 [41:07<1:17:42,  1.15it/s]

Episode 2651/8000, real env return = -107.13


 33%|████████████▎                        | 2661/8000 [41:15<1:14:09,  1.20it/s]

Episode 2661/8000, real env return = -106.89


 33%|████████████▎                        | 2671/8000 [41:24<1:13:39,  1.21it/s]

Episode 2671/8000, real env return = -107.57


 34%|████████████▍                        | 2681/8000 [41:33<1:15:42,  1.17it/s]

Episode 2681/8000, real env return = -106.42


 34%|████████████▍                        | 2691/8000 [41:41<1:13:37,  1.20it/s]

Episode 2691/8000, real env return = -107.04


 34%|████████████▍                        | 2701/8000 [41:49<1:13:47,  1.20it/s]

Episode 2701/8000, real env return = -106.20


 34%|████████████▌                        | 2711/8000 [41:58<1:12:56,  1.21it/s]

Episode 2711/8000, real env return = -107.15


 34%|████████████▌                        | 2721/8000 [42:06<1:14:48,  1.18it/s]

Episode 2721/8000, real env return = -106.19


 34%|████████████▋                        | 2731/8000 [42:15<1:13:55,  1.19it/s]

Episode 2731/8000, real env return = -106.66


 34%|████████████▋                        | 2741/8000 [42:24<1:16:57,  1.14it/s]

Episode 2741/8000, real env return = -107.06


 34%|████████████▋                        | 2751/8000 [42:32<1:17:59,  1.12it/s]

Episode 2751/8000, real env return = -106.75


 35%|████████████▊                        | 2761/8000 [42:41<1:15:53,  1.15it/s]

Episode 2761/8000, real env return = -105.94


 35%|████████████▊                        | 2771/8000 [42:49<1:11:22,  1.22it/s]

Episode 2771/8000, real env return = -106.52


 35%|████████████▊                        | 2781/8000 [42:57<1:14:13,  1.17it/s]

Episode 2781/8000, real env return = -105.87


 35%|████████████▉                        | 2791/8000 [43:06<1:13:55,  1.17it/s]

Episode 2791/8000, real env return = -107.16


 35%|████████████▉                        | 2801/8000 [43:14<1:12:48,  1.19it/s]

Episode 2801/8000, real env return = -106.93


 35%|█████████████                        | 2811/8000 [43:23<1:10:35,  1.23it/s]

Episode 2811/8000, real env return = -107.09


 35%|█████████████                        | 2821/8000 [43:31<1:11:58,  1.20it/s]

Episode 2821/8000, real env return = -108.20


 35%|█████████████                        | 2831/8000 [43:40<1:11:23,  1.21it/s]

Episode 2831/8000, real env return = -106.20


 36%|█████████████▏                       | 2841/8000 [43:48<1:10:49,  1.21it/s]

Episode 2841/8000, real env return = -106.71


 36%|█████████████▏                       | 2851/8000 [43:57<1:17:16,  1.11it/s]

Episode 2851/8000, real env return = -107.57


 36%|█████████████▏                       | 2861/8000 [44:06<1:20:54,  1.06it/s]

Episode 2861/8000, real env return = -106.94


 36%|█████████████▎                       | 2871/8000 [44:15<1:15:14,  1.14it/s]

Episode 2871/8000, real env return = -110.38


 36%|█████████████▎                       | 2881/8000 [44:24<1:15:00,  1.14it/s]

Episode 2881/8000, real env return = -106.66


 36%|█████████████▎                       | 2891/8000 [44:33<1:21:41,  1.04it/s]

Episode 2891/8000, real env return = -106.33


 36%|█████████████▍                       | 2901/8000 [44:42<1:19:09,  1.07it/s]

Episode 2901/8000, real env return = -106.49


 36%|█████████████▍                       | 2911/8000 [44:51<1:17:10,  1.10it/s]

Episode 2911/8000, real env return = -106.77


 37%|█████████████▌                       | 2921/8000 [45:00<1:13:26,  1.15it/s]

Episode 2921/8000, real env return = -106.07


 37%|█████████████▌                       | 2931/8000 [45:09<1:15:09,  1.12it/s]

Episode 2931/8000, real env return = -108.26


 37%|█████████████▌                       | 2941/8000 [45:18<1:14:25,  1.13it/s]

Episode 2941/8000, real env return = -106.78


 37%|█████████████▋                       | 2951/8000 [45:27<1:14:05,  1.14it/s]

Episode 2951/8000, real env return = -106.39


 37%|█████████████▋                       | 2961/8000 [45:36<1:13:19,  1.15it/s]

Episode 2961/8000, real env return = -107.26


 37%|█████████████▋                       | 2971/8000 [45:45<1:18:34,  1.07it/s]

Episode 2971/8000, real env return = -106.73


 37%|█████████████▊                       | 2981/8000 [45:54<1:12:38,  1.15it/s]

Episode 2981/8000, real env return = -106.72


 37%|█████████████▊                       | 2991/8000 [46:03<1:12:14,  1.16it/s]

Episode 2991/8000, real env return = -106.47


 38%|█████████████▉                       | 3001/8000 [46:11<1:14:40,  1.12it/s]

Episode 3001/8000, real env return = -106.85


 38%|█████████████▉                       | 3011/8000 [46:20<1:14:45,  1.11it/s]

Episode 3011/8000, real env return = -106.53


 38%|█████████████▉                       | 3021/8000 [46:30<1:17:44,  1.07it/s]

Episode 3021/8000, real env return = -106.22


 38%|██████████████                       | 3031/8000 [46:39<1:14:13,  1.12it/s]

Episode 3031/8000, real env return = -107.16


 38%|██████████████                       | 3041/8000 [46:48<1:11:15,  1.16it/s]

Episode 3041/8000, real env return = -106.24


 38%|██████████████                       | 3051/8000 [46:57<1:19:22,  1.04it/s]

Episode 3051/8000, real env return = -106.85


 38%|██████████████▏                      | 3061/8000 [47:06<1:15:16,  1.09it/s]

Episode 3061/8000, real env return = -107.24


 38%|██████████████▏                      | 3071/8000 [47:14<1:08:46,  1.19it/s]

Episode 3071/8000, real env return = -106.26


 39%|██████████████▏                      | 3081/8000 [47:23<1:09:42,  1.18it/s]

Episode 3081/8000, real env return = -107.31


 39%|██████████████▎                      | 3091/8000 [47:31<1:10:18,  1.16it/s]

Episode 3091/8000, real env return = -106.80


 39%|██████████████▎                      | 3101/8000 [47:40<1:07:21,  1.21it/s]

Episode 3101/8000, real env return = -108.80


 39%|██████████████▍                      | 3111/8000 [47:48<1:10:15,  1.16it/s]

Episode 3111/8000, real env return = -107.12


 39%|██████████████▍                      | 3121/8000 [47:57<1:09:18,  1.17it/s]

Episode 3121/8000, real env return = -106.92


 39%|██████████████▍                      | 3131/8000 [48:06<1:07:14,  1.21it/s]

Episode 3131/8000, real env return = -106.01


 39%|██████████████▌                      | 3141/8000 [48:14<1:11:20,  1.14it/s]

Episode 3141/8000, real env return = -107.14


 39%|██████████████▌                      | 3151/8000 [48:23<1:08:44,  1.18it/s]

Episode 3151/8000, real env return = -106.63


 40%|██████████████▌                      | 3161/8000 [48:32<1:11:10,  1.13it/s]

Episode 3161/8000, real env return = -107.30


 40%|██████████████▋                      | 3171/8000 [48:40<1:11:34,  1.12it/s]

Episode 3171/8000, real env return = -107.05


 40%|██████████████▋                      | 3181/8000 [48:49<1:10:25,  1.14it/s]

Episode 3181/8000, real env return = -106.62


 40%|██████████████▊                      | 3191/8000 [48:57<1:06:50,  1.20it/s]

Episode 3191/8000, real env return = -106.40


 40%|██████████████▊                      | 3201/8000 [49:06<1:06:11,  1.21it/s]

Episode 3201/8000, real env return = -106.65


 40%|██████████████▊                      | 3211/8000 [49:14<1:07:05,  1.19it/s]

Episode 3211/8000, real env return = -106.93


 40%|██████████████▉                      | 3221/8000 [49:23<1:07:32,  1.18it/s]

Episode 3221/8000, real env return = -102.60


 40%|██████████████▉                      | 3231/8000 [49:32<1:07:46,  1.17it/s]

Episode 3231/8000, real env return = -102.57


 41%|██████████████▉                      | 3241/8000 [49:40<1:10:24,  1.13it/s]

Episode 3241/8000, real env return = -102.46


 41%|███████████████                      | 3251/8000 [49:49<1:08:49,  1.15it/s]

Episode 3251/8000, real env return = -102.83


 41%|███████████████                      | 3261/8000 [49:58<1:09:01,  1.14it/s]

Episode 3261/8000, real env return = -103.02


 41%|███████████████▏                     | 3271/8000 [50:06<1:08:02,  1.16it/s]

Episode 3271/8000, real env return = -103.01


 41%|███████████████▏                     | 3281/8000 [50:15<1:09:16,  1.14it/s]

Episode 3281/8000, real env return = -102.69


 41%|███████████████▏                     | 3291/8000 [50:24<1:07:54,  1.16it/s]

Episode 3291/8000, real env return = -102.14


 41%|███████████████▎                     | 3301/8000 [50:32<1:06:03,  1.19it/s]

Episode 3301/8000, real env return = -105.62


 41%|███████████████▎                     | 3311/8000 [50:41<1:08:21,  1.14it/s]

Episode 3311/8000, real env return = -103.37


 42%|███████████████▎                     | 3321/8000 [50:49<1:09:11,  1.13it/s]

Episode 3321/8000, real env return = -103.71


 42%|███████████████▍                     | 3331/8000 [50:58<1:05:58,  1.18it/s]

Episode 3331/8000, real env return = -103.15


 42%|███████████████▍                     | 3341/8000 [51:07<1:08:44,  1.13it/s]

Episode 3341/8000, real env return = -104.12


 42%|███████████████▍                     | 3351/8000 [51:15<1:05:31,  1.18it/s]

Episode 3351/8000, real env return = -102.67


 42%|███████████████▌                     | 3361/8000 [51:24<1:05:29,  1.18it/s]

Episode 3361/8000, real env return = -103.83


 42%|███████████████▌                     | 3371/8000 [51:32<1:05:37,  1.18it/s]

Episode 3371/8000, real env return = -102.69


 42%|███████████████▋                     | 3381/8000 [51:41<1:05:18,  1.18it/s]

Episode 3381/8000, real env return = -104.02


 42%|███████████████▋                     | 3391/8000 [51:49<1:05:31,  1.17it/s]

Episode 3391/8000, real env return = -102.45


 43%|███████████████▋                     | 3401/8000 [51:58<1:04:38,  1.19it/s]

Episode 3401/8000, real env return = -104.18


 43%|███████████████▊                     | 3411/8000 [52:07<1:08:41,  1.11it/s]

Episode 3411/8000, real env return = -103.12


 43%|███████████████▊                     | 3421/8000 [52:15<1:03:54,  1.19it/s]

Episode 3421/8000, real env return = -102.72


 43%|███████████████▊                     | 3431/8000 [52:24<1:06:27,  1.15it/s]

Episode 3431/8000, real env return = -103.05


 43%|███████████████▉                     | 3441/8000 [52:32<1:04:11,  1.18it/s]

Episode 3441/8000, real env return = -103.04


 43%|███████████████▉                     | 3451/8000 [52:41<1:02:53,  1.21it/s]

Episode 3451/8000, real env return = -103.46


 43%|████████████████                     | 3461/8000 [52:49<1:04:16,  1.18it/s]

Episode 3461/8000, real env return = -102.92


 43%|████████████████                     | 3471/8000 [52:58<1:04:30,  1.17it/s]

Episode 3471/8000, real env return = -103.12


 44%|████████████████                     | 3481/8000 [53:06<1:02:33,  1.20it/s]

Episode 3481/8000, real env return = -102.77


 44%|████████████████▏                    | 3491/8000 [53:15<1:05:35,  1.15it/s]

Episode 3491/8000, real env return = -102.55


 44%|████████████████▏                    | 3501/8000 [53:23<1:03:16,  1.18it/s]

Episode 3501/8000, real env return = -102.76


 44%|████████████████▏                    | 3511/8000 [53:32<1:02:49,  1.19it/s]

Episode 3511/8000, real env return = -102.08


 44%|████████████████▎                    | 3521/8000 [53:41<1:05:36,  1.14it/s]

Episode 3521/8000, real env return = -102.49


 44%|████████████████▎                    | 3531/8000 [53:49<1:03:23,  1.17it/s]

Episode 3531/8000, real env return = -102.84


 44%|████████████████▍                    | 3541/8000 [53:58<1:03:52,  1.16it/s]

Episode 3541/8000, real env return = -102.37


 44%|████████████████▍                    | 3551/8000 [54:06<1:04:01,  1.16it/s]

Episode 3551/8000, real env return = -101.70


 45%|████████████████▍                    | 3561/8000 [54:15<1:02:08,  1.19it/s]

Episode 3561/8000, real env return = -102.08


 45%|████████████████▌                    | 3571/8000 [54:23<1:02:49,  1.17it/s]

Episode 3571/8000, real env return = -102.88


 45%|████████████████▌                    | 3581/8000 [54:32<1:02:37,  1.18it/s]

Episode 3581/8000, real env return = -101.77


 45%|████████████████▌                    | 3591/8000 [54:40<1:01:46,  1.19it/s]

Episode 3591/8000, real env return = -102.45


 45%|████████████████▋                    | 3601/8000 [54:49<1:01:19,  1.20it/s]

Episode 3601/8000, real env return = -102.60


 45%|████████████████▋                    | 3611/8000 [54:57<1:04:25,  1.14it/s]

Episode 3611/8000, real env return = -102.05


 45%|████████████████▋                    | 3621/8000 [55:06<1:02:36,  1.17it/s]

Episode 3621/8000, real env return = -101.77


 45%|████████████████▊                    | 3631/8000 [55:14<1:00:57,  1.19it/s]

Episode 3631/8000, real env return = -102.83


 46%|████████████████▊                    | 3641/8000 [55:23<1:03:31,  1.14it/s]

Episode 3641/8000, real env return = -102.92


 46%|████████████████▉                    | 3651/8000 [55:31<1:02:26,  1.16it/s]

Episode 3651/8000, real env return = -102.41


 46%|████████████████▉                    | 3661/8000 [55:40<1:03:08,  1.15it/s]

Episode 3661/8000, real env return = -102.90


 46%|████████████████▉                    | 3671/8000 [55:48<1:01:19,  1.18it/s]

Episode 3671/8000, real env return = -102.65


 46%|█████████████████                    | 3681/8000 [55:57<1:00:33,  1.19it/s]

Episode 3681/8000, real env return = -102.02


 46%|█████████████████                    | 3691/8000 [56:05<1:00:56,  1.18it/s]

Episode 3691/8000, real env return = -102.40


 46%|█████████████████                    | 3701/8000 [56:14<1:03:22,  1.13it/s]

Episode 3701/8000, real env return = -102.85


 46%|█████████████████▏                   | 3711/8000 [56:23<1:00:43,  1.18it/s]

Episode 3711/8000, real env return = -102.09


 47%|█████████████████▏                   | 3721/8000 [56:31<1:02:33,  1.14it/s]

Episode 3721/8000, real env return = -102.41


 47%|██████████████████▏                    | 3731/8000 [56:40<59:34,  1.19it/s]

Episode 3731/8000, real env return = -102.20


 47%|██████████████████▏                    | 3741/8000 [56:48<58:48,  1.21it/s]

Episode 3741/8000, real env return = -102.51


 47%|█████████████████▎                   | 3751/8000 [56:57<1:00:44,  1.17it/s]

Episode 3751/8000, real env return = -102.09


 47%|██████████████████▎                    | 3761/8000 [57:05<59:08,  1.19it/s]

Episode 3761/8000, real env return = -102.31


 47%|██████████████████▍                    | 3771/8000 [57:13<58:53,  1.20it/s]

Episode 3771/8000, real env return = -102.51


 47%|██████████████████▍                    | 3781/8000 [57:22<59:30,  1.18it/s]

Episode 3781/8000, real env return = -102.21


 47%|█████████████████▌                   | 3791/8000 [57:31<1:01:27,  1.14it/s]

Episode 3791/8000, real env return = -102.55


 48%|█████████████████▌                   | 3801/8000 [57:39<1:01:42,  1.13it/s]

Episode 3801/8000, real env return = -102.43


 48%|██████████████████▌                    | 3811/8000 [57:48<59:36,  1.17it/s]

Episode 3811/8000, real env return = -102.38


 48%|█████████████████▋                   | 3821/8000 [57:57<1:04:26,  1.08it/s]

Episode 3821/8000, real env return = -103.04


 48%|██████████████████▋                    | 3831/8000 [58:05<59:26,  1.17it/s]

Episode 3831/8000, real env return = -102.80


 48%|██████████████████▋                    | 3841/8000 [58:14<58:05,  1.19it/s]

Episode 3841/8000, real env return = -102.21


 48%|██████████████████▊                    | 3851/8000 [58:22<58:27,  1.18it/s]

Episode 3851/8000, real env return = -102.43


 48%|██████████████████▊                    | 3861/8000 [58:31<59:36,  1.16it/s]

Episode 3861/8000, real env return = -102.69


 48%|██████████████████▊                    | 3871/8000 [58:40<59:17,  1.16it/s]

Episode 3871/8000, real env return = -102.58


 49%|██████████████████▉                    | 3881/8000 [58:48<59:14,  1.16it/s]

Episode 3881/8000, real env return = -102.98


 49%|██████████████████▉                    | 3891/8000 [58:57<56:45,  1.21it/s]

Episode 3891/8000, real env return = -102.25


 49%|███████████████████                    | 3901/8000 [59:05<58:29,  1.17it/s]

Episode 3901/8000, real env return = -102.64


 49%|███████████████████                    | 3911/8000 [59:14<59:54,  1.14it/s]

Episode 3911/8000, real env return = -102.67


 49%|███████████████████                    | 3921/8000 [59:23<57:21,  1.19it/s]

Episode 3921/8000, real env return = -102.62


 49%|███████████████████▏                   | 3931/8000 [59:31<58:25,  1.16it/s]

Episode 3931/8000, real env return = -102.98


 49%|███████████████████▏                   | 3941/8000 [59:40<57:28,  1.18it/s]

Episode 3941/8000, real env return = -104.49


 49%|███████████████████▎                   | 3951/8000 [59:49<57:42,  1.17it/s]

Episode 3951/8000, real env return = -102.85


 50%|███████████████████▎                   | 3961/8000 [59:57<56:26,  1.19it/s]

Episode 3961/8000, real env return = -102.91


 50%|██████████████████▎                  | 3971/8000 [1:00:06<58:08,  1.16it/s]

Episode 3971/8000, real env return = -101.64


 50%|██████████████████▍                  | 3981/8000 [1:00:14<55:31,  1.21it/s]

Episode 3981/8000, real env return = -102.42


 50%|██████████████████▍                  | 3991/8000 [1:00:23<56:18,  1.19it/s]

Episode 3991/8000, real env return = -102.46


 50%|██████████████████▌                  | 4001/8000 [1:00:31<55:54,  1.19it/s]

Episode 4001/8000, real env return = -102.50


 50%|██████████████████▌                  | 4011/8000 [1:00:40<58:48,  1.13it/s]

Episode 4011/8000, real env return = -103.13


 50%|██████████████████▌                  | 4021/8000 [1:00:48<55:38,  1.19it/s]

Episode 4021/8000, real env return = -102.92


 50%|██████████████████▋                  | 4031/8000 [1:00:57<58:40,  1.13it/s]

Episode 4031/8000, real env return = -103.51


 51%|██████████████████▋                  | 4041/8000 [1:01:05<54:31,  1.21it/s]

Episode 4041/8000, real env return = -102.12


 51%|██████████████████▋                  | 4051/8000 [1:01:14<55:28,  1.19it/s]

Episode 4051/8000, real env return = -102.58


 51%|██████████████████▊                  | 4061/8000 [1:01:23<55:19,  1.19it/s]

Episode 4061/8000, real env return = -102.42


 51%|██████████████████▊                  | 4071/8000 [1:01:31<56:12,  1.16it/s]

Episode 4071/8000, real env return = -102.64


 51%|██████████████████▊                  | 4081/8000 [1:01:39<54:45,  1.19it/s]

Episode 4081/8000, real env return = -102.73


 51%|██████████████████▉                  | 4091/8000 [1:01:48<54:09,  1.20it/s]

Episode 4091/8000, real env return = -107.45


 51%|██████████████████▉                  | 4101/8000 [1:01:57<55:19,  1.17it/s]

Episode 4101/8000, real env return = -107.98


 51%|███████████████████                  | 4111/8000 [1:02:05<56:55,  1.14it/s]

Episode 4111/8000, real env return = -107.84


 52%|███████████████████                  | 4121/8000 [1:02:14<55:30,  1.16it/s]

Episode 4121/8000, real env return = -107.30


 52%|███████████████████                  | 4131/8000 [1:02:22<55:06,  1.17it/s]

Episode 4131/8000, real env return = -107.06


 52%|███████████████████▏                 | 4141/8000 [1:02:31<55:00,  1.17it/s]

Episode 4141/8000, real env return = -106.12


 52%|███████████████████▏                 | 4151/8000 [1:02:39<55:25,  1.16it/s]

Episode 4151/8000, real env return = -107.07


 52%|███████████████████▏                 | 4161/8000 [1:02:48<55:41,  1.15it/s]

Episode 4161/8000, real env return = -99.26


 52%|███████████████████▎                 | 4171/8000 [1:02:57<58:02,  1.10it/s]

Episode 4171/8000, real env return = -99.98


 52%|███████████████████▎                 | 4181/8000 [1:03:06<55:33,  1.15it/s]

Episode 4181/8000, real env return = -102.69


 52%|███████████████████▍                 | 4191/8000 [1:03:15<56:00,  1.13it/s]

Episode 4191/8000, real env return = -104.66


 53%|███████████████████▍                 | 4201/8000 [1:03:23<56:03,  1.13it/s]

Episode 4201/8000, real env return = -103.09


 53%|███████████████████▍                 | 4211/8000 [1:03:32<54:42,  1.15it/s]

Episode 4211/8000, real env return = -102.58


 53%|███████████████████▌                 | 4221/8000 [1:03:41<54:35,  1.15it/s]

Episode 4221/8000, real env return = -101.72


 53%|███████████████████▌                 | 4231/8000 [1:03:50<54:57,  1.14it/s]

Episode 4231/8000, real env return = -102.78


 53%|███████████████████▌                 | 4241/8000 [1:03:59<53:46,  1.16it/s]

Episode 4241/8000, real env return = -102.05


 53%|███████████████████▋                 | 4251/8000 [1:04:07<54:39,  1.14it/s]

Episode 4251/8000, real env return = -102.83


 53%|███████████████████▋                 | 4261/8000 [1:04:16<54:51,  1.14it/s]

Episode 4261/8000, real env return = -103.24


 53%|███████████████████▊                 | 4271/8000 [1:04:25<55:07,  1.13it/s]

Episode 4271/8000, real env return = -103.08


 54%|███████████████████▊                 | 4281/8000 [1:04:34<54:54,  1.13it/s]

Episode 4281/8000, real env return = -102.33


 54%|███████████████████▊                 | 4291/8000 [1:04:42<53:38,  1.15it/s]

Episode 4291/8000, real env return = -101.90


 54%|███████████████████▉                 | 4301/8000 [1:04:51<53:16,  1.16it/s]

Episode 4301/8000, real env return = -102.86


 54%|███████████████████▉                 | 4311/8000 [1:05:00<53:19,  1.15it/s]

Episode 4311/8000, real env return = -102.18


 54%|███████████████████▉                 | 4321/8000 [1:05:09<53:36,  1.14it/s]

Episode 4321/8000, real env return = -101.46


 54%|████████████████████                 | 4331/8000 [1:05:17<53:45,  1.14it/s]

Episode 4331/8000, real env return = -102.88


 54%|████████████████████                 | 4341/8000 [1:05:26<54:25,  1.12it/s]

Episode 4341/8000, real env return = -103.03


 54%|████████████████████                 | 4351/8000 [1:05:35<54:03,  1.12it/s]

Episode 4351/8000, real env return = -103.18


 55%|████████████████████▏                | 4361/8000 [1:05:44<51:02,  1.19it/s]

Episode 4361/8000, real env return = -103.13


 55%|████████████████████▏                | 4371/8000 [1:05:52<50:37,  1.19it/s]

Episode 4371/8000, real env return = -101.74


 55%|████████████████████▎                | 4381/8000 [1:06:01<50:18,  1.20it/s]

Episode 4381/8000, real env return = -103.00


 55%|████████████████████▎                | 4391/8000 [1:06:09<50:53,  1.18it/s]

Episode 4391/8000, real env return = -102.50


 55%|████████████████████▎                | 4401/8000 [1:06:18<50:44,  1.18it/s]

Episode 4401/8000, real env return = -102.18


 55%|████████████████████▍                | 4411/8000 [1:06:26<52:25,  1.14it/s]

Episode 4411/8000, real env return = -102.93


 55%|████████████████████▍                | 4421/8000 [1:06:35<50:30,  1.18it/s]

Episode 4421/8000, real env return = -103.21


 55%|████████████████████▍                | 4431/8000 [1:06:44<51:44,  1.15it/s]

Episode 4431/8000, real env return = -103.70


 56%|████████████████████▌                | 4441/8000 [1:06:52<51:38,  1.15it/s]

Episode 4441/8000, real env return = -102.54


 56%|████████████████████▌                | 4451/8000 [1:07:01<50:22,  1.17it/s]

Episode 4451/8000, real env return = -103.22


 56%|████████████████████▋                | 4461/8000 [1:07:09<49:10,  1.20it/s]

Episode 4461/8000, real env return = -102.03


 56%|████████████████████▋                | 4471/8000 [1:07:18<50:46,  1.16it/s]

Episode 4471/8000, real env return = -103.50


 56%|████████████████████▋                | 4481/8000 [1:07:27<55:25,  1.06it/s]

Episode 4481/8000, real env return = -103.11


 56%|████████████████████▊                | 4491/8000 [1:07:35<49:38,  1.18it/s]

Episode 4491/8000, real env return = -103.03


 56%|████████████████████▊                | 4501/8000 [1:07:44<51:05,  1.14it/s]

Episode 4501/8000, real env return = -103.02


 56%|████████████████████▊                | 4511/8000 [1:07:53<50:58,  1.14it/s]

Episode 4511/8000, real env return = -103.50


 57%|████████████████████▉                | 4521/8000 [1:08:01<50:38,  1.14it/s]

Episode 4521/8000, real env return = -102.53


 57%|████████████████████▉                | 4531/8000 [1:08:10<49:23,  1.17it/s]

Episode 4531/8000, real env return = -103.74


 57%|█████████████████████                | 4541/8000 [1:08:19<49:44,  1.16it/s]

Episode 4541/8000, real env return = -102.51


 57%|█████████████████████                | 4551/8000 [1:08:27<49:17,  1.17it/s]

Episode 4551/8000, real env return = -102.53


 57%|█████████████████████                | 4561/8000 [1:08:36<48:18,  1.19it/s]

Episode 4561/8000, real env return = -103.03


 57%|█████████████████████▏               | 4571/8000 [1:08:44<49:46,  1.15it/s]

Episode 4571/8000, real env return = -103.28


 57%|█████████████████████▏               | 4581/8000 [1:08:53<47:51,  1.19it/s]

Episode 4581/8000, real env return = -102.37


 57%|█████████████████████▏               | 4591/8000 [1:09:01<47:00,  1.21it/s]

Episode 4591/8000, real env return = -103.15


 58%|█████████████████████▎               | 4601/8000 [1:09:10<50:15,  1.13it/s]

Episode 4601/8000, real env return = -103.90


 58%|█████████████████████▎               | 4611/8000 [1:09:18<48:10,  1.17it/s]

Episode 4611/8000, real env return = -102.96


 58%|█████████████████████▎               | 4621/8000 [1:09:27<48:54,  1.15it/s]

Episode 4621/8000, real env return = -102.55


 58%|█████████████████████▍               | 4631/8000 [1:09:36<50:40,  1.11it/s]

Episode 4631/8000, real env return = -102.00


 58%|█████████████████████▍               | 4641/8000 [1:09:44<47:29,  1.18it/s]

Episode 4641/8000, real env return = -103.13


 58%|█████████████████████▌               | 4651/8000 [1:09:53<49:12,  1.13it/s]

Episode 4651/8000, real env return = -103.03


 58%|█████████████████████▌               | 4661/8000 [1:10:01<48:04,  1.16it/s]

Episode 4661/8000, real env return = -103.06


 58%|█████████████████████▌               | 4671/8000 [1:10:10<47:33,  1.17it/s]

Episode 4671/8000, real env return = -102.10


 59%|█████████████████████▋               | 4681/8000 [1:10:18<48:00,  1.15it/s]

Episode 4681/8000, real env return = -102.41


 59%|█████████████████████▋               | 4691/8000 [1:10:27<46:16,  1.19it/s]

Episode 4691/8000, real env return = -101.99


 59%|█████████████████████▋               | 4701/8000 [1:10:36<46:26,  1.18it/s]

Episode 4701/8000, real env return = -102.77


 59%|█████████████████████▊               | 4711/8000 [1:10:44<47:37,  1.15it/s]

Episode 4711/8000, real env return = -102.69


 59%|█████████████████████▊               | 4721/8000 [1:10:53<46:11,  1.18it/s]

Episode 4721/8000, real env return = -103.14


 59%|█████████████████████▉               | 4731/8000 [1:11:01<48:32,  1.12it/s]

Episode 4731/8000, real env return = -110.93


 59%|█████████████████████▉               | 4741/8000 [1:11:10<46:44,  1.16it/s]

Episode 4741/8000, real env return = -107.27


 59%|█████████████████████▉               | 4751/8000 [1:11:18<45:45,  1.18it/s]

Episode 4751/8000, real env return = -117.64


 60%|██████████████████████               | 4761/8000 [1:11:27<44:39,  1.21it/s]

Episode 4761/8000, real env return = -110.11


 60%|██████████████████████               | 4771/8000 [1:11:35<45:02,  1.19it/s]

Episode 4771/8000, real env return = -106.76


 60%|██████████████████████               | 4781/8000 [1:11:44<45:05,  1.19it/s]

Episode 4781/8000, real env return = -107.02


 60%|██████████████████████▏              | 4791/8000 [1:11:52<46:16,  1.16it/s]

Episode 4791/8000, real env return = -111.66


 60%|██████████████████████▏              | 4801/8000 [1:12:01<45:03,  1.18it/s]

Episode 4801/8000, real env return = -102.40


 60%|██████████████████████▎              | 4811/8000 [1:12:10<46:19,  1.15it/s]

Episode 4811/8000, real env return = -102.82


 60%|██████████████████████▎              | 4821/8000 [1:12:18<46:29,  1.14it/s]

Episode 4821/8000, real env return = -102.26


 60%|██████████████████████▎              | 4831/8000 [1:12:27<45:13,  1.17it/s]

Episode 4831/8000, real env return = -102.65


 61%|██████████████████████▍              | 4841/8000 [1:12:35<44:11,  1.19it/s]

Episode 4841/8000, real env return = -101.77


 61%|██████████████████████▍              | 4851/8000 [1:12:44<45:45,  1.15it/s]

Episode 4851/8000, real env return = -102.82


 61%|██████████████████████▍              | 4861/8000 [1:12:52<44:31,  1.17it/s]

Episode 4861/8000, real env return = -102.82


 61%|██████████████████████▌              | 4871/8000 [1:13:01<43:39,  1.19it/s]

Episode 4871/8000, real env return = -102.09


 61%|██████████████████████▌              | 4881/8000 [1:13:09<46:05,  1.13it/s]

Episode 4881/8000, real env return = -102.97


 61%|██████████████████████▌              | 4891/8000 [1:13:18<44:20,  1.17it/s]

Episode 4891/8000, real env return = -102.61


 61%|██████████████████████▋              | 4901/8000 [1:13:27<43:41,  1.18it/s]

Episode 4901/8000, real env return = -102.72


 61%|██████████████████████▋              | 4911/8000 [1:13:35<44:50,  1.15it/s]

Episode 4911/8000, real env return = -103.35


 62%|██████████████████████▊              | 4921/8000 [1:13:44<44:25,  1.16it/s]

Episode 4921/8000, real env return = -102.47


 62%|██████████████████████▊              | 4931/8000 [1:13:53<42:37,  1.20it/s]

Episode 4931/8000, real env return = -102.58


 62%|██████████████████████▊              | 4941/8000 [1:14:01<42:53,  1.19it/s]

Episode 4941/8000, real env return = -103.31


 62%|██████████████████████▉              | 4951/8000 [1:14:10<42:24,  1.20it/s]

Episode 4951/8000, real env return = -103.35


 62%|██████████████████████▉              | 4961/8000 [1:14:18<44:34,  1.14it/s]

Episode 4961/8000, real env return = -102.97


 62%|██████████████████████▉              | 4971/8000 [1:14:27<43:11,  1.17it/s]

Episode 4971/8000, real env return = -103.05


 62%|███████████████████████              | 4981/8000 [1:14:35<43:52,  1.15it/s]

Episode 4981/8000, real env return = -103.05


 62%|███████████████████████              | 4991/8000 [1:14:44<43:42,  1.15it/s]

Episode 4991/8000, real env return = -103.28


 63%|███████████████████████▏             | 5001/8000 [1:14:52<42:28,  1.18it/s]

Episode 5001/8000, real env return = -103.31


 63%|███████████████████████▏             | 5011/8000 [1:15:01<43:25,  1.15it/s]

Episode 5011/8000, real env return = -103.15


 63%|███████████████████████▏             | 5021/8000 [1:15:09<42:00,  1.18it/s]

Episode 5021/8000, real env return = -103.41


 63%|███████████████████████▎             | 5031/8000 [1:15:18<43:32,  1.14it/s]

Episode 5031/8000, real env return = -103.42


 63%|███████████████████████▎             | 5041/8000 [1:15:27<43:36,  1.13it/s]

Episode 5041/8000, real env return = -103.61


 63%|███████████████████████▎             | 5051/8000 [1:15:36<43:50,  1.12it/s]

Episode 5051/8000, real env return = -106.68


 63%|███████████████████████▍             | 5061/8000 [1:15:44<42:04,  1.16it/s]

Episode 5061/8000, real env return = -106.80


 63%|███████████████████████▍             | 5071/8000 [1:15:53<43:21,  1.13it/s]

Episode 5071/8000, real env return = -110.63


 64%|███████████████████████▍             | 5081/8000 [1:16:02<40:49,  1.19it/s]

Episode 5081/8000, real env return = -107.89


 64%|███████████████████████▌             | 5091/8000 [1:16:10<42:46,  1.13it/s]

Episode 5091/8000, real env return = -108.17


 64%|███████████████████████▌             | 5101/8000 [1:16:19<40:55,  1.18it/s]

Episode 5101/8000, real env return = -107.58


 64%|███████████████████████▋             | 5111/8000 [1:16:28<41:59,  1.15it/s]

Episode 5111/8000, real env return = -107.91


 64%|███████████████████████▋             | 5121/8000 [1:16:36<40:11,  1.19it/s]

Episode 5121/8000, real env return = -107.07


 64%|███████████████████████▋             | 5131/8000 [1:16:44<39:56,  1.20it/s]

Episode 5131/8000, real env return = -107.62


 64%|███████████████████████▊             | 5141/8000 [1:16:53<40:05,  1.19it/s]

Episode 5141/8000, real env return = -106.41


 64%|███████████████████████▊             | 5151/8000 [1:17:02<39:42,  1.20it/s]

Episode 5151/8000, real env return = -107.83


 65%|███████████████████████▊             | 5161/8000 [1:17:10<40:47,  1.16it/s]

Episode 5161/8000, real env return = -107.87


 65%|███████████████████████▉             | 5171/8000 [1:17:18<39:03,  1.21it/s]

Episode 5171/8000, real env return = -107.00


 65%|███████████████████████▉             | 5181/8000 [1:17:27<38:44,  1.21it/s]

Episode 5181/8000, real env return = -106.80


 65%|████████████████████████             | 5191/8000 [1:17:35<39:20,  1.19it/s]

Episode 5191/8000, real env return = -106.84


 65%|████████████████████████             | 5201/8000 [1:17:44<39:22,  1.18it/s]

Episode 5201/8000, real env return = -107.83


 65%|████████████████████████             | 5211/8000 [1:17:52<39:46,  1.17it/s]

Episode 5211/8000, real env return = -107.61


 65%|████████████████████████▏            | 5221/8000 [1:18:01<40:03,  1.16it/s]

Episode 5221/8000, real env return = -107.13


 65%|████████████████████████▏            | 5231/8000 [1:18:10<40:24,  1.14it/s]

Episode 5231/8000, real env return = -106.95


 66%|████████████████████████▏            | 5241/8000 [1:18:18<38:21,  1.20it/s]

Episode 5241/8000, real env return = -107.61


 66%|████████████████████████▎            | 5251/8000 [1:18:26<39:53,  1.15it/s]

Episode 5251/8000, real env return = -106.84


 66%|████████████████████████▎            | 5261/8000 [1:18:35<39:20,  1.16it/s]

Episode 5261/8000, real env return = -106.83


 66%|████████████████████████▍            | 5271/8000 [1:18:43<39:14,  1.16it/s]

Episode 5271/8000, real env return = -107.09


 66%|████████████████████████▍            | 5281/8000 [1:18:52<37:58,  1.19it/s]

Episode 5281/8000, real env return = -107.09


 66%|████████████████████████▍            | 5291/8000 [1:19:01<37:59,  1.19it/s]

Episode 5291/8000, real env return = -106.85


 66%|████████████████████████▌            | 5301/8000 [1:19:09<37:21,  1.20it/s]

Episode 5301/8000, real env return = -107.80


 66%|████████████████████████▌            | 5311/8000 [1:19:18<38:16,  1.17it/s]

Episode 5311/8000, real env return = -107.55


 67%|████████████████████████▌            | 5321/8000 [1:19:26<37:24,  1.19it/s]

Episode 5321/8000, real env return = -107.66


 67%|████████████████████████▋            | 5331/8000 [1:19:34<37:33,  1.18it/s]

Episode 5331/8000, real env return = -107.64


 67%|████████████████████████▋            | 5341/8000 [1:19:43<36:45,  1.21it/s]

Episode 5341/8000, real env return = -107.47


 67%|████████████████████████▋            | 5351/8000 [1:19:51<37:02,  1.19it/s]

Episode 5351/8000, real env return = -107.67


 67%|████████████████████████▊            | 5361/8000 [1:20:00<37:06,  1.19it/s]

Episode 5361/8000, real env return = -107.96


 67%|████████████████████████▊            | 5371/8000 [1:20:08<37:12,  1.18it/s]

Episode 5371/8000, real env return = -110.24


 67%|████████████████████████▉            | 5381/8000 [1:20:17<37:14,  1.17it/s]

Episode 5381/8000, real env return = -108.65


 67%|████████████████████████▉            | 5391/8000 [1:20:26<36:30,  1.19it/s]

Episode 5391/8000, real env return = -108.95


 68%|████████████████████████▉            | 5401/8000 [1:20:34<37:27,  1.16it/s]

Episode 5401/8000, real env return = -108.56


 68%|█████████████████████████            | 5411/8000 [1:20:43<35:30,  1.22it/s]

Episode 5411/8000, real env return = -108.78


 68%|█████████████████████████            | 5421/8000 [1:20:51<36:50,  1.17it/s]

Episode 5421/8000, real env return = -107.78


 68%|█████████████████████████            | 5431/8000 [1:21:00<37:08,  1.15it/s]

Episode 5431/8000, real env return = -107.43


 68%|█████████████████████████▏           | 5441/8000 [1:21:08<36:32,  1.17it/s]

Episode 5441/8000, real env return = -108.41


 68%|█████████████████████████▏           | 5451/8000 [1:21:17<35:25,  1.20it/s]

Episode 5451/8000, real env return = -107.61


 68%|█████████████████████████▎           | 5461/8000 [1:21:25<36:53,  1.15it/s]

Episode 5461/8000, real env return = -108.91


 68%|█████████████████████████▎           | 5471/8000 [1:21:33<35:11,  1.20it/s]

Episode 5471/8000, real env return = -106.70


 69%|█████████████████████████▎           | 5481/8000 [1:21:42<35:10,  1.19it/s]

Episode 5481/8000, real env return = -108.94


 69%|█████████████████████████▍           | 5491/8000 [1:21:50<34:45,  1.20it/s]

Episode 5491/8000, real env return = -106.48


 69%|█████████████████████████▍           | 5501/8000 [1:21:59<35:24,  1.18it/s]

Episode 5501/8000, real env return = -107.26


 69%|█████████████████████████▍           | 5511/8000 [1:22:07<35:22,  1.17it/s]

Episode 5511/8000, real env return = -107.36


 69%|█████████████████████████▌           | 5521/8000 [1:22:16<34:32,  1.20it/s]

Episode 5521/8000, real env return = -107.48


 69%|█████████████████████████▌           | 5531/8000 [1:22:24<34:35,  1.19it/s]

Episode 5531/8000, real env return = -108.82


 69%|█████████████████████████▋           | 5541/8000 [1:22:33<34:30,  1.19it/s]

Episode 5541/8000, real env return = -107.16


 69%|█████████████████████████▋           | 5551/8000 [1:22:41<34:45,  1.17it/s]

Episode 5551/8000, real env return = -108.01


 70%|█████████████████████████▋           | 5561/8000 [1:22:50<35:13,  1.15it/s]

Episode 5561/8000, real env return = -107.41


 70%|█████████████████████████▊           | 5571/8000 [1:22:58<34:15,  1.18it/s]

Episode 5571/8000, real env return = -107.49


 70%|█████████████████████████▊           | 5581/8000 [1:23:07<34:52,  1.16it/s]

Episode 5581/8000, real env return = -108.55


 70%|█████████████████████████▊           | 5591/8000 [1:23:16<34:15,  1.17it/s]

Episode 5591/8000, real env return = -107.70


 70%|█████████████████████████▉           | 5601/8000 [1:23:24<34:35,  1.16it/s]

Episode 5601/8000, real env return = -110.65


 70%|█████████████████████████▉           | 5611/8000 [1:23:33<33:55,  1.17it/s]

Episode 5611/8000, real env return = -107.14


 70%|█████████████████████████▉           | 5621/8000 [1:23:41<32:59,  1.20it/s]

Episode 5621/8000, real env return = -107.49


 70%|██████████████████████████           | 5631/8000 [1:23:50<34:30,  1.14it/s]

Episode 5631/8000, real env return = -107.44


 71%|██████████████████████████           | 5641/8000 [1:23:58<33:09,  1.19it/s]

Episode 5641/8000, real env return = -108.25


 71%|██████████████████████████▏          | 5651/8000 [1:24:07<34:28,  1.14it/s]

Episode 5651/8000, real env return = -107.51


 71%|██████████████████████████▏          | 5661/8000 [1:24:15<32:42,  1.19it/s]

Episode 5661/8000, real env return = -107.18


 71%|██████████████████████████▏          | 5671/8000 [1:24:24<33:26,  1.16it/s]

Episode 5671/8000, real env return = -106.46


 71%|██████████████████████████▎          | 5681/8000 [1:24:33<33:46,  1.14it/s]

Episode 5681/8000, real env return = -107.00


 71%|██████████████████████████▎          | 5691/8000 [1:24:41<31:12,  1.23it/s]

Episode 5691/8000, real env return = -106.38


 71%|██████████████████████████▎          | 5701/8000 [1:24:50<33:25,  1.15it/s]

Episode 5701/8000, real env return = -106.32


 71%|██████████████████████████▍          | 5711/8000 [1:24:58<32:13,  1.18it/s]

Episode 5711/8000, real env return = -107.22


 72%|██████████████████████████▍          | 5721/8000 [1:25:07<32:01,  1.19it/s]

Episode 5721/8000, real env return = -106.85


 72%|██████████████████████████▌          | 5731/8000 [1:25:16<33:11,  1.14it/s]

Episode 5731/8000, real env return = -106.72


 72%|██████████████████████████▌          | 5741/8000 [1:25:24<31:44,  1.19it/s]

Episode 5741/8000, real env return = -107.29


 72%|██████████████████████████▌          | 5751/8000 [1:25:33<32:45,  1.14it/s]

Episode 5751/8000, real env return = -106.95


 72%|██████████████████████████▋          | 5761/8000 [1:25:41<31:51,  1.17it/s]

Episode 5761/8000, real env return = -107.07


 72%|██████████████████████████▋          | 5771/8000 [1:25:50<31:30,  1.18it/s]

Episode 5771/8000, real env return = -106.39


 72%|██████████████████████████▋          | 5781/8000 [1:25:58<30:43,  1.20it/s]

Episode 5781/8000, real env return = -106.36


 72%|██████████████████████████▊          | 5791/8000 [1:26:07<30:36,  1.20it/s]

Episode 5791/8000, real env return = -107.26


 73%|██████████████████████████▊          | 5801/8000 [1:26:15<31:02,  1.18it/s]

Episode 5801/8000, real env return = -107.13


 73%|██████████████████████████▉          | 5811/8000 [1:26:24<30:08,  1.21it/s]

Episode 5811/8000, real env return = -106.14


 73%|██████████████████████████▉          | 5821/8000 [1:26:32<29:56,  1.21it/s]

Episode 5821/8000, real env return = -106.65


 73%|██████████████████████████▉          | 5831/8000 [1:26:41<30:07,  1.20it/s]

Episode 5831/8000, real env return = -106.48


 73%|███████████████████████████          | 5841/8000 [1:26:49<31:15,  1.15it/s]

Episode 5841/8000, real env return = -107.67


 73%|███████████████████████████          | 5851/8000 [1:26:58<30:16,  1.18it/s]

Episode 5851/8000, real env return = -107.72


 73%|███████████████████████████          | 5861/8000 [1:27:06<30:23,  1.17it/s]

Episode 5861/8000, real env return = -108.03


 73%|███████████████████████████▏         | 5871/8000 [1:27:15<31:00,  1.14it/s]

Episode 5871/8000, real env return = -107.10


 74%|███████████████████████████▏         | 5881/8000 [1:27:23<29:35,  1.19it/s]

Episode 5881/8000, real env return = -107.63


 74%|███████████████████████████▏         | 5891/8000 [1:27:32<29:24,  1.20it/s]

Episode 5891/8000, real env return = -107.06


 74%|███████████████████████████▎         | 5901/8000 [1:27:40<28:54,  1.21it/s]

Episode 5901/8000, real env return = -107.05


 74%|███████████████████████████▎         | 5911/8000 [1:27:49<29:35,  1.18it/s]

Episode 5911/8000, real env return = -107.48


 74%|███████████████████████████▍         | 5921/8000 [1:27:57<30:17,  1.14it/s]

Episode 5921/8000, real env return = -106.73


 74%|███████████████████████████▍         | 5931/8000 [1:28:06<28:48,  1.20it/s]

Episode 5931/8000, real env return = -104.28


 74%|███████████████████████████▍         | 5941/8000 [1:28:14<29:15,  1.17it/s]

Episode 5941/8000, real env return = -103.94


 74%|███████████████████████████▌         | 5951/8000 [1:28:23<30:10,  1.13it/s]

Episode 5951/8000, real env return = -105.05


 75%|███████████████████████████▌         | 5961/8000 [1:28:31<29:45,  1.14it/s]

Episode 5961/8000, real env return = -105.04


 75%|███████████████████████████▌         | 5971/8000 [1:28:40<28:49,  1.17it/s]

Episode 5971/8000, real env return = -105.06


 75%|███████████████████████████▋         | 5981/8000 [1:28:49<28:08,  1.20it/s]

Episode 5981/8000, real env return = -104.99


 75%|███████████████████████████▋         | 5991/8000 [1:28:57<27:40,  1.21it/s]

Episode 5991/8000, real env return = -105.35


 75%|███████████████████████████▊         | 6001/8000 [1:29:06<28:47,  1.16it/s]

Episode 6001/8000, real env return = -105.26


 75%|███████████████████████████▊         | 6011/8000 [1:29:14<28:15,  1.17it/s]

Episode 6011/8000, real env return = -105.35


 75%|███████████████████████████▊         | 6021/8000 [1:29:23<27:49,  1.19it/s]

Episode 6021/8000, real env return = -105.05


 75%|███████████████████████████▉         | 6031/8000 [1:29:31<27:33,  1.19it/s]

Episode 6031/8000, real env return = -104.88


 76%|███████████████████████████▉         | 6041/8000 [1:29:40<27:42,  1.18it/s]

Episode 6041/8000, real env return = -105.29


 76%|███████████████████████████▉         | 6051/8000 [1:29:48<27:01,  1.20it/s]

Episode 6051/8000, real env return = -104.79


 76%|████████████████████████████         | 6061/8000 [1:29:57<27:12,  1.19it/s]

Episode 6061/8000, real env return = -105.05


 76%|████████████████████████████         | 6071/8000 [1:30:05<26:30,  1.21it/s]

Episode 6071/8000, real env return = -104.72


 76%|████████████████████████████         | 6081/8000 [1:30:14<28:01,  1.14it/s]

Episode 6081/8000, real env return = -105.19


 76%|████████████████████████████▏        | 6091/8000 [1:30:22<27:43,  1.15it/s]

Episode 6091/8000, real env return = -104.67


 76%|████████████████████████████▏        | 6101/8000 [1:30:31<28:13,  1.12it/s]

Episode 6101/8000, real env return = -104.34


 76%|████████████████████████████▎        | 6111/8000 [1:30:40<26:50,  1.17it/s]

Episode 6111/8000, real env return = -104.77


 77%|████████████████████████████▎        | 6121/8000 [1:30:48<27:26,  1.14it/s]

Episode 6121/8000, real env return = -104.52


 77%|████████████████████████████▎        | 6131/8000 [1:30:57<26:03,  1.20it/s]

Episode 6131/8000, real env return = -104.75


 77%|████████████████████████████▍        | 6141/8000 [1:31:05<26:42,  1.16it/s]

Episode 6141/8000, real env return = -104.19


 77%|████████████████████████████▍        | 6151/8000 [1:31:14<25:40,  1.20it/s]

Episode 6151/8000, real env return = -104.88


 77%|████████████████████████████▍        | 6161/8000 [1:31:22<25:31,  1.20it/s]

Episode 6161/8000, real env return = -103.89


 77%|████████████████████████████▌        | 6171/8000 [1:31:31<26:47,  1.14it/s]

Episode 6171/8000, real env return = -103.83


 77%|████████████████████████████▌        | 6181/8000 [1:31:39<25:18,  1.20it/s]

Episode 6181/8000, real env return = -103.69


 77%|████████████████████████████▋        | 6191/8000 [1:31:48<24:29,  1.23it/s]

Episode 6191/8000, real env return = -104.32


 78%|████████████████████████████▋        | 6201/8000 [1:31:56<25:11,  1.19it/s]

Episode 6201/8000, real env return = -104.56


 78%|████████████████████████████▋        | 6211/8000 [1:32:05<25:16,  1.18it/s]

Episode 6211/8000, real env return = -105.16


 78%|████████████████████████████▊        | 6221/8000 [1:32:13<25:09,  1.18it/s]

Episode 6221/8000, real env return = -104.95


 78%|████████████████████████████▊        | 6231/8000 [1:32:22<24:32,  1.20it/s]

Episode 6231/8000, real env return = -103.81


 78%|████████████████████████████▊        | 6241/8000 [1:32:30<24:38,  1.19it/s]

Episode 6241/8000, real env return = -104.88


 78%|████████████████████████████▉        | 6251/8000 [1:32:39<24:55,  1.17it/s]

Episode 6251/8000, real env return = -104.72


 78%|████████████████████████████▉        | 6261/8000 [1:32:47<24:09,  1.20it/s]

Episode 6261/8000, real env return = -104.96


 78%|█████████████████████████████        | 6271/8000 [1:32:56<25:13,  1.14it/s]

Episode 6271/8000, real env return = -104.58


 79%|█████████████████████████████        | 6281/8000 [1:33:04<23:48,  1.20it/s]

Episode 6281/8000, real env return = -104.67


 79%|█████████████████████████████        | 6291/8000 [1:33:13<23:42,  1.20it/s]

Episode 6291/8000, real env return = -104.83


 79%|█████████████████████████████▏       | 6301/8000 [1:33:21<23:58,  1.18it/s]

Episode 6301/8000, real env return = -105.00


 79%|█████████████████████████████▏       | 6311/8000 [1:33:30<24:14,  1.16it/s]

Episode 6311/8000, real env return = -105.09


 79%|█████████████████████████████▏       | 6321/8000 [1:33:38<23:11,  1.21it/s]

Episode 6321/8000, real env return = -104.49


 79%|█████████████████████████████▎       | 6331/8000 [1:33:47<24:16,  1.15it/s]

Episode 6331/8000, real env return = -104.60


 79%|█████████████████████████████▎       | 6341/8000 [1:33:55<23:25,  1.18it/s]

Episode 6341/8000, real env return = -104.50


 79%|█████████████████████████████▎       | 6351/8000 [1:34:03<23:08,  1.19it/s]

Episode 6351/8000, real env return = -104.45


 80%|█████████████████████████████▍       | 6361/8000 [1:34:12<23:43,  1.15it/s]

Episode 6361/8000, real env return = -104.85


 80%|█████████████████████████████▍       | 6371/8000 [1:34:20<22:35,  1.20it/s]

Episode 6371/8000, real env return = -104.99


 80%|█████████████████████████████▌       | 6381/8000 [1:34:29<22:59,  1.17it/s]

Episode 6381/8000, real env return = -104.57


 80%|█████████████████████████████▌       | 6391/8000 [1:34:38<23:39,  1.13it/s]

Episode 6391/8000, real env return = -103.34


 80%|█████████████████████████████▌       | 6401/8000 [1:34:46<22:20,  1.19it/s]

Episode 6401/8000, real env return = -108.61


 80%|█████████████████████████████▋       | 6411/8000 [1:34:55<22:32,  1.18it/s]

Episode 6411/8000, real env return = -107.72


 80%|█████████████████████████████▋       | 6421/8000 [1:35:03<22:20,  1.18it/s]

Episode 6421/8000, real env return = -104.93


 80%|█████████████████████████████▋       | 6431/8000 [1:35:12<22:42,  1.15it/s]

Episode 6431/8000, real env return = -104.78


 81%|█████████████████████████████▊       | 6441/8000 [1:35:20<21:59,  1.18it/s]

Episode 6441/8000, real env return = -104.71


 81%|█████████████████████████████▊       | 6451/8000 [1:35:28<21:31,  1.20it/s]

Episode 6451/8000, real env return = -104.28


 81%|█████████████████████████████▉       | 6461/8000 [1:35:37<22:36,  1.13it/s]

Episode 6461/8000, real env return = -104.57


 81%|█████████████████████████████▉       | 6471/8000 [1:35:45<21:09,  1.20it/s]

Episode 6471/8000, real env return = -104.74


 81%|█████████████████████████████▉       | 6481/8000 [1:35:54<21:07,  1.20it/s]

Episode 6481/8000, real env return = -104.99


 81%|██████████████████████████████       | 6491/8000 [1:36:02<22:04,  1.14it/s]

Episode 6491/8000, real env return = -105.14


 81%|██████████████████████████████       | 6501/8000 [1:36:11<20:55,  1.19it/s]

Episode 6501/8000, real env return = -105.16


 81%|██████████████████████████████       | 6511/8000 [1:36:19<21:04,  1.18it/s]

Episode 6511/8000, real env return = -105.03


 82%|██████████████████████████████▏      | 6521/8000 [1:36:28<20:34,  1.20it/s]

Episode 6521/8000, real env return = -105.17


 82%|██████████████████████████████▏      | 6531/8000 [1:36:36<20:26,  1.20it/s]

Episode 6531/8000, real env return = -104.95


 82%|██████████████████████████████▎      | 6541/8000 [1:36:44<20:00,  1.22it/s]

Episode 6541/8000, real env return = -104.69


 82%|██████████████████████████████▎      | 6551/8000 [1:36:53<20:29,  1.18it/s]

Episode 6551/8000, real env return = -104.58


 82%|██████████████████████████████▎      | 6561/8000 [1:37:02<21:06,  1.14it/s]

Episode 6561/8000, real env return = -104.74


 82%|██████████████████████████████▍      | 6571/8000 [1:37:10<20:51,  1.14it/s]

Episode 6571/8000, real env return = -104.59


 82%|██████████████████████████████▍      | 6581/8000 [1:37:18<19:50,  1.19it/s]

Episode 6581/8000, real env return = -105.01


 82%|██████████████████████████████▍      | 6591/8000 [1:37:27<20:02,  1.17it/s]

Episode 6591/8000, real env return = -105.00


 83%|██████████████████████████████▌      | 6601/8000 [1:37:36<20:25,  1.14it/s]

Episode 6601/8000, real env return = -105.25


 83%|██████████████████████████████▌      | 6611/8000 [1:37:44<20:32,  1.13it/s]

Episode 6611/8000, real env return = -105.00


 83%|██████████████████████████████▌      | 6621/8000 [1:37:53<19:48,  1.16it/s]

Episode 6621/8000, real env return = -105.28


 83%|██████████████████████████████▋      | 6631/8000 [1:38:01<19:24,  1.18it/s]

Episode 6631/8000, real env return = -105.44


 83%|██████████████████████████████▋      | 6641/8000 [1:38:10<19:14,  1.18it/s]

Episode 6641/8000, real env return = -105.33


 83%|██████████████████████████████▊      | 6651/8000 [1:38:18<18:59,  1.18it/s]

Episode 6651/8000, real env return = -105.82


 83%|██████████████████████████████▊      | 6661/8000 [1:38:27<19:20,  1.15it/s]

Episode 6661/8000, real env return = -105.26


 83%|██████████████████████████████▊      | 6671/8000 [1:38:35<19:16,  1.15it/s]

Episode 6671/8000, real env return = -105.67


 84%|██████████████████████████████▉      | 6681/8000 [1:38:44<18:59,  1.16it/s]

Episode 6681/8000, real env return = -106.32


 84%|██████████████████████████████▉      | 6691/8000 [1:38:52<18:26,  1.18it/s]

Episode 6691/8000, real env return = -106.32


 84%|██████████████████████████████▉      | 6701/8000 [1:39:01<18:34,  1.17it/s]

Episode 6701/8000, real env return = -105.99


 84%|███████████████████████████████      | 6711/8000 [1:39:10<18:52,  1.14it/s]

Episode 6711/8000, real env return = -106.30


 84%|███████████████████████████████      | 6721/8000 [1:39:18<18:17,  1.16it/s]

Episode 6721/8000, real env return = -106.09


 84%|███████████████████████████████▏     | 6731/8000 [1:39:27<18:14,  1.16it/s]

Episode 6731/8000, real env return = -106.46


 84%|███████████████████████████████▏     | 6741/8000 [1:39:35<18:13,  1.15it/s]

Episode 6741/8000, real env return = -105.50


 84%|███████████████████████████████▏     | 6751/8000 [1:39:44<18:16,  1.14it/s]

Episode 6751/8000, real env return = -106.04


 85%|███████████████████████████████▎     | 6761/8000 [1:39:53<17:40,  1.17it/s]

Episode 6761/8000, real env return = -106.29


 85%|███████████████████████████████▎     | 6771/8000 [1:40:01<17:29,  1.17it/s]

Episode 6771/8000, real env return = -106.34


 85%|███████████████████████████████▎     | 6781/8000 [1:40:09<17:32,  1.16it/s]

Episode 6781/8000, real env return = -105.81


 85%|███████████████████████████████▍     | 6791/8000 [1:40:18<17:06,  1.18it/s]

Episode 6791/8000, real env return = -106.39


 85%|███████████████████████████████▍     | 6801/8000 [1:40:26<16:36,  1.20it/s]

Episode 6801/8000, real env return = -107.75


 85%|███████████████████████████████▌     | 6811/8000 [1:40:35<16:49,  1.18it/s]

Episode 6811/8000, real env return = -106.03


 85%|███████████████████████████████▌     | 6821/8000 [1:40:43<16:30,  1.19it/s]

Episode 6821/8000, real env return = -105.73


 85%|███████████████████████████████▌     | 6831/8000 [1:40:52<17:25,  1.12it/s]

Episode 6831/8000, real env return = -105.97


 86%|███████████████████████████████▋     | 6841/8000 [1:41:01<16:15,  1.19it/s]

Episode 6841/8000, real env return = -106.06


 86%|███████████████████████████████▋     | 6851/8000 [1:41:09<16:09,  1.18it/s]

Episode 6851/8000, real env return = -105.65


 86%|███████████████████████████████▋     | 6861/8000 [1:41:18<16:26,  1.15it/s]

Episode 6861/8000, real env return = -106.06


 86%|███████████████████████████████▊     | 6871/8000 [1:41:26<16:18,  1.15it/s]

Episode 6871/8000, real env return = -106.17


 86%|███████████████████████████████▊     | 6881/8000 [1:41:35<15:45,  1.18it/s]

Episode 6881/8000, real env return = -105.77


 86%|███████████████████████████████▊     | 6891/8000 [1:41:43<15:59,  1.16it/s]

Episode 6891/8000, real env return = -105.92


 86%|███████████████████████████████▉     | 6901/8000 [1:41:52<16:28,  1.11it/s]

Episode 6901/8000, real env return = -106.26


 86%|███████████████████████████████▉     | 6911/8000 [1:42:01<15:15,  1.19it/s]

Episode 6911/8000, real env return = -106.08


 87%|████████████████████████████████     | 6921/8000 [1:42:10<15:09,  1.19it/s]

Episode 6921/8000, real env return = -105.86


 87%|████████████████████████████████     | 6931/8000 [1:42:18<15:02,  1.18it/s]

Episode 6931/8000, real env return = -105.75


 87%|████████████████████████████████     | 6941/8000 [1:42:26<14:50,  1.19it/s]

Episode 6941/8000, real env return = -106.08


 87%|████████████████████████████████▏    | 6951/8000 [1:42:35<15:11,  1.15it/s]

Episode 6951/8000, real env return = -105.13


 87%|████████████████████████████████▏    | 6961/8000 [1:42:43<14:25,  1.20it/s]

Episode 6961/8000, real env return = -104.85


 87%|████████████████████████████████▏    | 6971/8000 [1:42:52<14:53,  1.15it/s]

Episode 6971/8000, real env return = -105.28


 87%|████████████████████████████████▎    | 6981/8000 [1:43:01<14:44,  1.15it/s]

Episode 6981/8000, real env return = -105.93


 87%|████████████████████████████████▎    | 6991/8000 [1:43:09<14:43,  1.14it/s]

Episode 6991/8000, real env return = -104.96


 88%|████████████████████████████████▍    | 7001/8000 [1:43:18<14:39,  1.14it/s]

Episode 7001/8000, real env return = -106.65


 88%|████████████████████████████████▍    | 7011/8000 [1:43:26<13:59,  1.18it/s]

Episode 7011/8000, real env return = -106.13


 88%|████████████████████████████████▍    | 7021/8000 [1:43:35<14:10,  1.15it/s]

Episode 7021/8000, real env return = -106.04


 88%|████████████████████████████████▌    | 7031/8000 [1:43:43<13:27,  1.20it/s]

Episode 7031/8000, real env return = -104.89


 88%|████████████████████████████████▌    | 7041/8000 [1:43:52<13:22,  1.20it/s]

Episode 7041/8000, real env return = -106.33


 88%|████████████████████████████████▌    | 7051/8000 [1:44:00<13:04,  1.21it/s]

Episode 7051/8000, real env return = -105.92


 88%|████████████████████████████████▋    | 7061/8000 [1:44:08<13:14,  1.18it/s]

Episode 7061/8000, real env return = -105.97


 88%|████████████████████████████████▋    | 7071/8000 [1:44:17<13:05,  1.18it/s]

Episode 7071/8000, real env return = -106.07


 89%|████████████████████████████████▋    | 7081/8000 [1:44:26<13:57,  1.10it/s]

Episode 7081/8000, real env return = -107.20


 89%|████████████████████████████████▊    | 7091/8000 [1:44:34<13:08,  1.15it/s]

Episode 7091/8000, real env return = -106.13


 89%|████████████████████████████████▊    | 7101/8000 [1:44:43<12:28,  1.20it/s]

Episode 7101/8000, real env return = -106.04


 89%|████████████████████████████████▉    | 7111/8000 [1:44:51<12:14,  1.21it/s]

Episode 7111/8000, real env return = -105.68


 89%|████████████████████████████████▉    | 7121/8000 [1:45:00<12:50,  1.14it/s]

Episode 7121/8000, real env return = -105.62


 89%|████████████████████████████████▉    | 7131/8000 [1:45:08<12:06,  1.20it/s]

Episode 7131/8000, real env return = -105.57


 89%|█████████████████████████████████    | 7141/8000 [1:45:17<12:18,  1.16it/s]

Episode 7141/8000, real env return = -105.64


 89%|█████████████████████████████████    | 7151/8000 [1:45:25<11:53,  1.19it/s]

Episode 7151/8000, real env return = -105.41


 90%|█████████████████████████████████    | 7161/8000 [1:45:34<11:50,  1.18it/s]

Episode 7161/8000, real env return = -103.91


 90%|█████████████████████████████████▏   | 7171/8000 [1:45:42<11:47,  1.17it/s]

Episode 7171/8000, real env return = -102.27


 90%|█████████████████████████████████▏   | 7181/8000 [1:45:51<11:28,  1.19it/s]

Episode 7181/8000, real env return = -105.91


 90%|█████████████████████████████████▎   | 7191/8000 [1:46:00<11:23,  1.18it/s]

Episode 7191/8000, real env return = -105.69


 90%|█████████████████████████████████▎   | 7201/8000 [1:46:08<11:27,  1.16it/s]

Episode 7201/8000, real env return = -105.65


 90%|█████████████████████████████████▎   | 7211/8000 [1:46:17<11:24,  1.15it/s]

Episode 7211/8000, real env return = -105.89


 90%|█████████████████████████████████▍   | 7221/8000 [1:46:25<11:00,  1.18it/s]

Episode 7221/8000, real env return = -105.53


 90%|█████████████████████████████████▍   | 7231/8000 [1:46:34<10:54,  1.17it/s]

Episode 7231/8000, real env return = -106.00


 91%|█████████████████████████████████▍   | 7241/8000 [1:46:43<11:05,  1.14it/s]

Episode 7241/8000, real env return = -107.65


 91%|█████████████████████████████████▌   | 7251/8000 [1:46:51<10:51,  1.15it/s]

Episode 7251/8000, real env return = -106.11


 91%|█████████████████████████████████▌   | 7261/8000 [1:47:00<10:55,  1.13it/s]

Episode 7261/8000, real env return = -106.00


 91%|█████████████████████████████████▋   | 7271/8000 [1:47:09<11:30,  1.06it/s]

Episode 7271/8000, real env return = -106.34


 91%|█████████████████████████████████▋   | 7281/8000 [1:47:18<10:56,  1.09it/s]

Episode 7281/8000, real env return = -105.54


 91%|█████████████████████████████████▋   | 7291/8000 [1:47:27<10:24,  1.14it/s]

Episode 7291/8000, real env return = -105.66


 91%|█████████████████████████████████▊   | 7301/8000 [1:47:36<10:28,  1.11it/s]

Episode 7301/8000, real env return = -105.25


 91%|█████████████████████████████████▊   | 7311/8000 [1:47:45<10:16,  1.12it/s]

Episode 7311/8000, real env return = -105.65


 92%|█████████████████████████████████▊   | 7321/8000 [1:47:54<10:00,  1.13it/s]

Episode 7321/8000, real env return = -106.00


 92%|█████████████████████████████████▉   | 7331/8000 [1:48:03<09:37,  1.16it/s]

Episode 7331/8000, real env return = -105.07


 92%|█████████████████████████████████▉   | 7341/8000 [1:48:12<09:50,  1.12it/s]

Episode 7341/8000, real env return = -128.36


 92%|█████████████████████████████████▉   | 7351/8000 [1:48:21<09:37,  1.12it/s]

Episode 7351/8000, real env return = -112.10


 92%|██████████████████████████████████   | 7361/8000 [1:48:30<09:34,  1.11it/s]

Episode 7361/8000, real env return = -108.29


 92%|██████████████████████████████████   | 7371/8000 [1:48:39<09:12,  1.14it/s]

Episode 7371/8000, real env return = -107.34


 92%|██████████████████████████████████▏  | 7381/8000 [1:48:47<09:14,  1.12it/s]

Episode 7381/8000, real env return = -105.66


 92%|██████████████████████████████████▏  | 7391/8000 [1:48:56<09:01,  1.13it/s]

Episode 7391/8000, real env return = -105.35


 93%|██████████████████████████████████▏  | 7401/8000 [1:49:05<08:42,  1.15it/s]

Episode 7401/8000, real env return = -105.64


 93%|██████████████████████████████████▎  | 7411/8000 [1:49:14<08:42,  1.13it/s]

Episode 7411/8000, real env return = -105.68


 93%|██████████████████████████████████▎  | 7421/8000 [1:49:23<08:04,  1.20it/s]

Episode 7421/8000, real env return = -107.72


 93%|██████████████████████████████████▎  | 7431/8000 [1:49:31<08:20,  1.14it/s]

Episode 7431/8000, real env return = -108.66


 93%|██████████████████████████████████▍  | 7441/8000 [1:49:40<08:05,  1.15it/s]

Episode 7441/8000, real env return = -105.27


 93%|██████████████████████████████████▍  | 7451/8000 [1:49:49<07:48,  1.17it/s]

Episode 7451/8000, real env return = -105.61


 93%|██████████████████████████████████▌  | 7461/8000 [1:49:58<07:44,  1.16it/s]

Episode 7461/8000, real env return = -105.43


 93%|██████████████████████████████████▌  | 7471/8000 [1:50:06<07:08,  1.23it/s]

Episode 7471/8000, real env return = -105.57


 94%|██████████████████████████████████▌  | 7481/8000 [1:50:15<07:31,  1.15it/s]

Episode 7481/8000, real env return = -105.56


 94%|██████████████████████████████████▋  | 7491/8000 [1:50:24<07:23,  1.15it/s]

Episode 7491/8000, real env return = -105.29


 94%|██████████████████████████████████▋  | 7501/8000 [1:50:33<07:02,  1.18it/s]

Episode 7501/8000, real env return = -106.97


 94%|██████████████████████████████████▋  | 7511/8000 [1:50:42<07:29,  1.09it/s]

Episode 7511/8000, real env return = -106.14


 94%|██████████████████████████████████▊  | 7521/8000 [1:50:51<07:31,  1.06it/s]

Episode 7521/8000, real env return = -105.76


 94%|██████████████████████████████████▊  | 7531/8000 [1:50:59<06:47,  1.15it/s]

Episode 7531/8000, real env return = -104.95


 94%|██████████████████████████████████▉  | 7541/8000 [1:51:08<06:48,  1.12it/s]

Episode 7541/8000, real env return = -105.93


 94%|██████████████████████████████████▉  | 7551/8000 [1:51:17<06:16,  1.19it/s]

Episode 7551/8000, real env return = -105.59


 95%|██████████████████████████████████▉  | 7561/8000 [1:51:26<06:31,  1.12it/s]

Episode 7561/8000, real env return = -106.25


 95%|███████████████████████████████████  | 7571/8000 [1:51:34<06:08,  1.16it/s]

Episode 7571/8000, real env return = -106.16


 95%|███████████████████████████████████  | 7581/8000 [1:51:43<05:52,  1.19it/s]

Episode 7581/8000, real env return = -106.31


 95%|███████████████████████████████████  | 7591/8000 [1:51:52<05:44,  1.19it/s]

Episode 7591/8000, real env return = -106.08


 95%|███████████████████████████████████▏ | 7601/8000 [1:52:00<05:44,  1.16it/s]

Episode 7601/8000, real env return = -105.63


 95%|███████████████████████████████████▏ | 7611/8000 [1:52:09<05:30,  1.18it/s]

Episode 7611/8000, real env return = -105.26


 95%|███████████████████████████████████▏ | 7621/8000 [1:52:17<05:19,  1.19it/s]

Episode 7621/8000, real env return = -105.88


 95%|███████████████████████████████████▎ | 7631/8000 [1:52:26<05:08,  1.20it/s]

Episode 7631/8000, real env return = -105.77


 96%|███████████████████████████████████▎ | 7641/8000 [1:52:35<05:13,  1.14it/s]

Episode 7641/8000, real env return = -105.82


 96%|███████████████████████████████████▍ | 7651/8000 [1:52:43<05:02,  1.15it/s]

Episode 7651/8000, real env return = -105.77


 96%|███████████████████████████████████▍ | 7661/8000 [1:52:52<04:48,  1.17it/s]

Episode 7661/8000, real env return = -105.90


 96%|███████████████████████████████████▍ | 7671/8000 [1:53:00<04:42,  1.16it/s]

Episode 7671/8000, real env return = -105.14


 96%|███████████████████████████████████▌ | 7681/8000 [1:53:09<04:50,  1.10it/s]

Episode 7681/8000, real env return = -106.12


 96%|███████████████████████████████████▌ | 7691/8000 [1:53:18<04:44,  1.09it/s]

Episode 7691/8000, real env return = -105.29


 96%|███████████████████████████████████▌ | 7701/8000 [1:53:26<04:17,  1.16it/s]

Episode 7701/8000, real env return = -105.81


 96%|███████████████████████████████████▋ | 7711/8000 [1:53:35<04:08,  1.16it/s]

Episode 7711/8000, real env return = -105.39


 97%|███████████████████████████████████▋ | 7721/8000 [1:53:44<04:26,  1.05it/s]

Episode 7721/8000, real env return = -106.69


 97%|███████████████████████████████████▊ | 7731/8000 [1:53:53<04:02,  1.11it/s]

Episode 7731/8000, real env return = -105.89


 97%|███████████████████████████████████▊ | 7741/8000 [1:54:02<03:39,  1.18it/s]

Episode 7741/8000, real env return = -105.67


 97%|███████████████████████████████████▊ | 7751/8000 [1:54:10<03:33,  1.17it/s]

Episode 7751/8000, real env return = -106.26


 97%|███████████████████████████████████▉ | 7761/8000 [1:54:19<03:23,  1.18it/s]

Episode 7761/8000, real env return = -105.70


 97%|███████████████████████████████████▉ | 7771/8000 [1:54:27<03:10,  1.20it/s]

Episode 7771/8000, real env return = -105.13


 97%|███████████████████████████████████▉ | 7781/8000 [1:54:35<03:02,  1.20it/s]

Episode 7781/8000, real env return = -105.95


 97%|████████████████████████████████████ | 7791/8000 [1:54:44<03:02,  1.15it/s]

Episode 7791/8000, real env return = -105.82


 98%|████████████████████████████████████ | 7801/8000 [1:54:53<02:47,  1.19it/s]

Episode 7801/8000, real env return = -104.70


 98%|████████████████████████████████████▏| 7811/8000 [1:55:01<02:45,  1.14it/s]

Episode 7811/8000, real env return = -104.76


 98%|████████████████████████████████████▏| 7821/8000 [1:55:10<02:28,  1.21it/s]

Episode 7821/8000, real env return = -105.15


 98%|████████████████████████████████████▏| 7831/8000 [1:55:18<02:23,  1.18it/s]

Episode 7831/8000, real env return = -104.74


 98%|████████████████████████████████████▎| 7841/8000 [1:55:27<02:13,  1.19it/s]

Episode 7841/8000, real env return = -105.46


 98%|████████████████████████████████████▎| 7851/8000 [1:55:35<02:07,  1.17it/s]

Episode 7851/8000, real env return = -106.91


 98%|████████████████████████████████████▎| 7861/8000 [1:55:44<02:01,  1.15it/s]

Episode 7861/8000, real env return = -106.45


 98%|████████████████████████████████████▍| 7871/8000 [1:55:53<01:53,  1.13it/s]

Episode 7871/8000, real env return = -106.25


 99%|████████████████████████████████████▍| 7881/8000 [1:56:01<01:42,  1.16it/s]

Episode 7881/8000, real env return = -106.29


 99%|████████████████████████████████████▍| 7891/8000 [1:56:09<01:31,  1.19it/s]

Episode 7891/8000, real env return = -105.65


 99%|████████████████████████████████████▌| 7901/8000 [1:56:18<01:26,  1.14it/s]

Episode 7901/8000, real env return = -105.86


 99%|████████████████████████████████████▌| 7911/8000 [1:56:26<01:14,  1.19it/s]

Episode 7911/8000, real env return = -106.01


 99%|████████████████████████████████████▋| 7921/8000 [1:56:35<01:08,  1.15it/s]

Episode 7921/8000, real env return = -105.34


 99%|████████████████████████████████████▋| 7931/8000 [1:56:43<00:58,  1.18it/s]

Episode 7931/8000, real env return = -106.27


 99%|████████████████████████████████████▋| 7941/8000 [1:56:52<00:52,  1.13it/s]

Episode 7941/8000, real env return = -106.13


 99%|████████████████████████████████████▊| 7951/8000 [1:57:01<00:43,  1.12it/s]

Episode 7951/8000, real env return = -106.11


100%|████████████████████████████████████▊| 7961/8000 [1:57:09<00:33,  1.16it/s]

Episode 7961/8000, real env return = -105.92


100%|████████████████████████████████████▊| 7971/8000 [1:57:18<00:24,  1.17it/s]

Episode 7971/8000, real env return = -106.27


100%|████████████████████████████████████▉| 7981/8000 [1:57:26<00:16,  1.17it/s]

Episode 7981/8000, real env return = -106.10


100%|████████████████████████████████████▉| 7991/8000 [1:57:35<00:07,  1.16it/s]

Episode 7991/8000, real env return = -106.62


100%|█████████████████████████████████████| 8000/8000 [1:57:43<00:00,  1.13it/s]

Training finished.





In [7]:
train5 = main()

Using cuda device


  0%|                                          | 2/8000 [00:00<08:55, 14.92it/s]

Episode 1/8000, real env return = -103.73


  0%|                                       | 11/8000 [00:10<2:34:07,  1.16s/it]

Episode 11/8000, real env return = -109.32


  0%|                                       | 21/8000 [00:24<3:32:07,  1.60s/it]

Episode 21/8000, real env return = -83.38


  0%|▏                                      | 31/8000 [00:35<2:10:08,  1.02it/s]

Episode 31/8000, real env return = -101.99


  1%|▏                                      | 41/8000 [00:44<2:01:49,  1.09it/s]

Episode 41/8000, real env return = -117.09


  1%|▏                                      | 51/8000 [00:53<2:03:46,  1.07it/s]

Episode 51/8000, real env return = -110.97


  1%|▎                                      | 61/8000 [01:10<3:56:50,  1.79s/it]

Episode 61/8000, real env return = -197.66


  1%|▎                                      | 71/8000 [01:29<4:39:51,  2.12s/it]

Episode 71/8000, real env return = -44.06


  1%|▍                                      | 81/8000 [01:50<4:25:04,  2.01s/it]

Episode 81/8000, real env return = -206.20


  1%|▍                                      | 91/8000 [02:00<2:02:09,  1.08it/s]

Episode 91/8000, real env return = -110.02


  1%|▍                                     | 101/8000 [02:13<3:05:06,  1.41s/it]

Episode 101/8000, real env return = -91.73


  1%|▌                                     | 111/8000 [02:23<2:01:08,  1.09it/s]

Episode 111/8000, real env return = -105.97


  2%|▌                                     | 121/8000 [02:41<4:25:53,  2.02s/it]

Episode 121/8000, real env return = -142.19


  2%|▌                                     | 131/8000 [02:55<3:11:04,  1.46s/it]

Episode 131/8000, real env return = -139.91


  2%|▋                                     | 141/8000 [03:14<4:30:57,  2.07s/it]

Episode 141/8000, real env return = -66.86


  2%|▋                                     | 151/8000 [03:36<4:42:04,  2.16s/it]

Episode 151/8000, real env return = -128.18


  2%|▊                                     | 161/8000 [03:57<4:36:05,  2.11s/it]

Episode 161/8000, real env return = -164.50


  2%|▊                                     | 171/8000 [04:17<4:26:35,  2.04s/it]

Episode 171/8000, real env return = -136.07


  2%|▊                                     | 181/8000 [04:32<2:21:31,  1.09s/it]

Episode 181/8000, real env return = -128.17


  2%|▉                                     | 191/8000 [04:45<3:36:19,  1.66s/it]

Episode 191/8000, real env return = -107.31


  3%|▉                                     | 201/8000 [05:01<3:51:45,  1.78s/it]

Episode 201/8000, real env return = -77.04


  3%|█                                     | 211/8000 [05:21<4:35:29,  2.12s/it]

Episode 211/8000, real env return = -91.81


  3%|█                                     | 221/8000 [05:42<4:30:45,  2.09s/it]

Episode 221/8000, real env return = -103.24


  3%|█                                     | 231/8000 [06:01<3:58:43,  1.84s/it]

Episode 231/8000, real env return = -103.02


  3%|█▏                                    | 241/8000 [06:18<4:08:43,  1.92s/it]

Episode 241/8000, real env return = -98.13


  3%|█▏                                    | 251/8000 [06:39<4:30:31,  2.09s/it]

Episode 251/8000, real env return = -112.21


  3%|█▏                                    | 261/8000 [06:56<3:51:58,  1.80s/it]

Episode 261/8000, real env return = -106.49


  3%|█▎                                    | 271/8000 [07:12<4:02:54,  1.89s/it]

Episode 271/8000, real env return = -114.25


  4%|█▎                                    | 281/8000 [07:30<3:23:19,  1.58s/it]

Episode 281/8000, real env return = -113.94


  4%|█▍                                    | 291/8000 [07:44<2:59:50,  1.40s/it]

Episode 291/8000, real env return = -103.65


  4%|█▍                                    | 301/8000 [07:55<2:03:40,  1.04it/s]

Episode 301/8000, real env return = -130.66


  4%|█▍                                    | 311/8000 [08:04<1:53:57,  1.12it/s]

Episode 311/8000, real env return = -129.80


  4%|█▌                                    | 321/8000 [08:13<2:01:41,  1.05it/s]

Episode 321/8000, real env return = -129.31


  4%|█▌                                    | 331/8000 [08:22<1:53:30,  1.13it/s]

Episode 331/8000, real env return = -128.25


  4%|█▌                                    | 341/8000 [08:31<1:55:58,  1.10it/s]

Episode 341/8000, real env return = -131.31


  4%|█▋                                    | 351/8000 [08:40<1:55:44,  1.10it/s]

Episode 351/8000, real env return = -129.78


  5%|█▋                                    | 361/8000 [08:49<1:52:30,  1.13it/s]

Episode 361/8000, real env return = -130.38


  5%|█▊                                    | 371/8000 [08:58<1:57:12,  1.08it/s]

Episode 371/8000, real env return = -128.93


  5%|█▊                                    | 381/8000 [09:07<1:50:08,  1.15it/s]

Episode 381/8000, real env return = -127.09


  5%|█▊                                    | 391/8000 [09:16<1:52:34,  1.13it/s]

Episode 391/8000, real env return = -122.18


  5%|█▉                                    | 401/8000 [09:26<2:02:16,  1.04it/s]

Episode 401/8000, real env return = -128.92


  5%|█▉                                    | 411/8000 [09:35<1:53:29,  1.11it/s]

Episode 411/8000, real env return = -108.76


  5%|█▉                                    | 421/8000 [09:44<1:50:43,  1.14it/s]

Episode 421/8000, real env return = -110.92


  5%|██                                    | 431/8000 [09:52<1:52:10,  1.12it/s]

Episode 431/8000, real env return = -122.72


  6%|██                                    | 441/8000 [10:01<1:52:52,  1.12it/s]

Episode 441/8000, real env return = -122.73


  6%|██▏                                   | 451/8000 [10:10<1:45:47,  1.19it/s]

Episode 451/8000, real env return = -123.15


  6%|██▏                                   | 461/8000 [10:18<1:48:13,  1.16it/s]

Episode 461/8000, real env return = -122.29


  6%|██▏                                   | 471/8000 [10:27<1:51:19,  1.13it/s]

Episode 471/8000, real env return = -121.97


  6%|██▎                                   | 481/8000 [10:36<1:47:20,  1.17it/s]

Episode 481/8000, real env return = -122.21


  6%|██▎                                   | 491/8000 [10:44<1:52:26,  1.11it/s]

Episode 491/8000, real env return = -117.12


  6%|██▍                                   | 501/8000 [10:53<1:51:49,  1.12it/s]

Episode 501/8000, real env return = -122.38


  6%|██▍                                   | 511/8000 [11:02<1:51:57,  1.11it/s]

Episode 511/8000, real env return = -122.55


  7%|██▍                                   | 521/8000 [11:12<1:57:02,  1.06it/s]

Episode 521/8000, real env return = -123.34


  7%|██▌                                   | 531/8000 [11:20<1:48:55,  1.14it/s]

Episode 531/8000, real env return = -123.92


  7%|██▌                                   | 541/8000 [11:29<1:43:30,  1.20it/s]

Episode 541/8000, real env return = -123.02


  7%|██▌                                   | 551/8000 [11:37<1:44:49,  1.18it/s]

Episode 551/8000, real env return = -122.56


  7%|██▋                                   | 561/8000 [11:46<1:50:28,  1.12it/s]

Episode 561/8000, real env return = -122.69


  7%|██▋                                   | 571/8000 [11:54<1:47:20,  1.15it/s]

Episode 571/8000, real env return = -122.58


  7%|██▊                                   | 581/8000 [12:03<1:43:54,  1.19it/s]

Episode 581/8000, real env return = -121.72


  7%|██▊                                   | 591/8000 [12:12<1:46:06,  1.16it/s]

Episode 591/8000, real env return = -121.76


  8%|██▊                                   | 601/8000 [12:20<1:44:00,  1.19it/s]

Episode 601/8000, real env return = -121.12


  8%|██▉                                   | 611/8000 [12:29<1:43:17,  1.19it/s]

Episode 611/8000, real env return = -123.37


  8%|██▉                                   | 621/8000 [12:38<1:47:33,  1.14it/s]

Episode 621/8000, real env return = -121.20


  8%|██▉                                   | 631/8000 [12:46<1:44:40,  1.17it/s]

Episode 631/8000, real env return = -121.98


  8%|███                                   | 641/8000 [12:55<1:42:59,  1.19it/s]

Episode 641/8000, real env return = -122.34


  8%|███                                   | 651/8000 [13:03<1:48:05,  1.13it/s]

Episode 651/8000, real env return = -121.18


  8%|███▏                                  | 661/8000 [13:12<1:43:50,  1.18it/s]

Episode 661/8000, real env return = -122.35


  8%|███▏                                  | 671/8000 [13:21<1:46:37,  1.15it/s]

Episode 671/8000, real env return = -110.25


  9%|███▏                                  | 681/8000 [13:29<1:45:11,  1.16it/s]

Episode 681/8000, real env return = -122.56


  9%|███▎                                  | 691/8000 [13:38<1:48:23,  1.12it/s]

Episode 691/8000, real env return = -122.22


  9%|███▎                                  | 701/8000 [13:47<1:44:22,  1.17it/s]

Episode 701/8000, real env return = -122.20


  9%|███▍                                  | 711/8000 [13:56<1:48:29,  1.12it/s]

Episode 711/8000, real env return = -122.76


  9%|███▍                                  | 721/8000 [14:04<1:47:44,  1.13it/s]

Episode 721/8000, real env return = -122.11


  9%|███▍                                  | 731/8000 [14:13<1:44:20,  1.16it/s]

Episode 731/8000, real env return = -122.90


  9%|███▌                                  | 741/8000 [14:21<1:39:49,  1.21it/s]

Episode 741/8000, real env return = -122.79


  9%|███▌                                  | 751/8000 [14:30<1:41:36,  1.19it/s]

Episode 751/8000, real env return = -122.43


 10%|███▌                                  | 761/8000 [14:38<1:42:13,  1.18it/s]

Episode 761/8000, real env return = -122.92


 10%|███▋                                  | 771/8000 [14:47<1:44:53,  1.15it/s]

Episode 771/8000, real env return = -122.85


 10%|███▋                                  | 781/8000 [14:56<1:46:00,  1.13it/s]

Episode 781/8000, real env return = -123.24


 10%|███▊                                  | 791/8000 [15:04<1:41:35,  1.18it/s]

Episode 791/8000, real env return = -122.51


 10%|███▊                                  | 801/8000 [15:12<1:41:33,  1.18it/s]

Episode 801/8000, real env return = -121.97


 10%|███▊                                  | 811/8000 [15:21<1:45:46,  1.13it/s]

Episode 811/8000, real env return = -123.52


 10%|███▉                                  | 821/8000 [15:30<1:41:20,  1.18it/s]

Episode 821/8000, real env return = -121.90


 10%|███▉                                  | 831/8000 [15:38<1:39:55,  1.20it/s]

Episode 831/8000, real env return = -122.31


 11%|███▉                                  | 841/8000 [15:47<1:41:51,  1.17it/s]

Episode 841/8000, real env return = -121.62


 11%|████                                  | 851/8000 [15:55<1:40:58,  1.18it/s]

Episode 851/8000, real env return = -121.56


 11%|████                                  | 861/8000 [16:04<1:46:03,  1.12it/s]

Episode 861/8000, real env return = -122.32


 11%|████▏                                 | 871/8000 [16:13<1:43:10,  1.15it/s]

Episode 871/8000, real env return = -121.83


 11%|████▏                                 | 881/8000 [16:22<1:42:46,  1.15it/s]

Episode 881/8000, real env return = -121.31


 11%|████▏                                 | 891/8000 [16:30<1:42:45,  1.15it/s]

Episode 891/8000, real env return = -122.02


 11%|████▎                                 | 901/8000 [16:39<1:41:23,  1.17it/s]

Episode 901/8000, real env return = -122.11


 11%|████▎                                 | 911/8000 [16:47<1:39:35,  1.19it/s]

Episode 911/8000, real env return = -118.99


 12%|████▎                                 | 921/8000 [16:56<1:37:44,  1.21it/s]

Episode 921/8000, real env return = -115.96


 12%|████▍                                 | 931/8000 [17:04<1:39:15,  1.19it/s]

Episode 931/8000, real env return = -112.69


 12%|████▍                                 | 941/8000 [17:13<1:41:38,  1.16it/s]

Episode 941/8000, real env return = -113.23


 12%|████▌                                 | 951/8000 [17:22<1:43:13,  1.14it/s]

Episode 951/8000, real env return = -113.13


 12%|████▌                                 | 961/8000 [17:30<1:39:27,  1.18it/s]

Episode 961/8000, real env return = -112.06


 12%|████▌                                 | 971/8000 [17:39<1:39:02,  1.18it/s]

Episode 971/8000, real env return = -112.68


 12%|████▋                                 | 981/8000 [17:47<1:38:25,  1.19it/s]

Episode 981/8000, real env return = -111.89


 12%|████▋                                 | 991/8000 [17:56<1:39:54,  1.17it/s]

Episode 991/8000, real env return = -111.55


 13%|████▋                                | 1001/8000 [18:04<1:38:25,  1.19it/s]

Episode 1001/8000, real env return = -120.33


 13%|████▋                                | 1011/8000 [18:13<1:41:12,  1.15it/s]

Episode 1011/8000, real env return = -111.38


 13%|████▋                                | 1021/8000 [18:22<1:43:06,  1.13it/s]

Episode 1021/8000, real env return = -110.61


 13%|████▊                                | 1031/8000 [18:30<1:34:18,  1.23it/s]

Episode 1031/8000, real env return = -110.94


 13%|████▊                                | 1041/8000 [18:38<1:35:41,  1.21it/s]

Episode 1041/8000, real env return = -111.19


 13%|████▊                                | 1051/8000 [18:47<1:36:53,  1.20it/s]

Episode 1051/8000, real env return = -110.74


 13%|████▉                                | 1061/8000 [18:55<1:40:58,  1.15it/s]

Episode 1061/8000, real env return = -110.84


 13%|████▉                                | 1071/8000 [19:04<1:42:15,  1.13it/s]

Episode 1071/8000, real env return = -110.82


 14%|████▉                                | 1081/8000 [19:13<1:38:03,  1.18it/s]

Episode 1081/8000, real env return = -110.40


 14%|█████                                | 1091/8000 [19:21<1:38:42,  1.17it/s]

Episode 1091/8000, real env return = -110.86


 14%|█████                                | 1101/8000 [19:30<1:40:31,  1.14it/s]

Episode 1101/8000, real env return = -110.77


 14%|█████▏                               | 1111/8000 [19:38<1:36:28,  1.19it/s]

Episode 1111/8000, real env return = -110.52


 14%|█████▏                               | 1121/8000 [19:47<1:40:07,  1.15it/s]

Episode 1121/8000, real env return = -110.12


 14%|█████▏                               | 1131/8000 [19:55<1:40:48,  1.14it/s]

Episode 1131/8000, real env return = -110.14


 14%|█████▎                               | 1141/8000 [20:04<1:36:24,  1.19it/s]

Episode 1141/8000, real env return = -110.75


 14%|█████▎                               | 1151/8000 [20:12<1:33:13,  1.22it/s]

Episode 1151/8000, real env return = -111.54


 15%|█████▎                               | 1161/8000 [20:21<1:39:36,  1.14it/s]

Episode 1161/8000, real env return = -110.83


 15%|█████▍                               | 1171/8000 [20:29<1:40:48,  1.13it/s]

Episode 1171/8000, real env return = -110.70


 15%|█████▍                               | 1181/8000 [20:38<1:39:38,  1.14it/s]

Episode 1181/8000, real env return = -110.57


 15%|█████▌                               | 1191/8000 [20:46<1:34:24,  1.20it/s]

Episode 1191/8000, real env return = -110.95


 15%|█████▌                               | 1201/8000 [20:55<1:32:42,  1.22it/s]

Episode 1201/8000, real env return = -109.92


 15%|█████▌                               | 1211/8000 [21:03<1:35:35,  1.18it/s]

Episode 1211/8000, real env return = -110.42


 15%|█████▋                               | 1221/8000 [21:12<1:38:55,  1.14it/s]

Episode 1221/8000, real env return = -110.92


 15%|█████▋                               | 1231/8000 [21:20<1:32:56,  1.21it/s]

Episode 1231/8000, real env return = -110.90


 16%|█████▋                               | 1241/8000 [21:29<1:41:33,  1.11it/s]

Episode 1241/8000, real env return = -111.23


 16%|█████▊                               | 1251/8000 [21:38<1:39:10,  1.13it/s]

Episode 1251/8000, real env return = -111.06


 16%|█████▊                               | 1261/8000 [21:46<1:37:57,  1.15it/s]

Episode 1261/8000, real env return = -111.45


 16%|█████▉                               | 1271/8000 [21:55<1:34:46,  1.18it/s]

Episode 1271/8000, real env return = -112.08


 16%|█████▉                               | 1281/8000 [22:03<1:34:45,  1.18it/s]

Episode 1281/8000, real env return = -114.33


 16%|█████▉                               | 1291/8000 [22:12<1:34:26,  1.18it/s]

Episode 1291/8000, real env return = -111.94


 16%|██████                               | 1301/8000 [22:21<1:36:14,  1.16it/s]

Episode 1301/8000, real env return = -110.48


 16%|██████                               | 1311/8000 [22:29<1:34:19,  1.18it/s]

Episode 1311/8000, real env return = -110.34


 17%|██████                               | 1321/8000 [22:37<1:31:08,  1.22it/s]

Episode 1321/8000, real env return = -110.13


 17%|██████▏                              | 1331/8000 [22:46<1:32:23,  1.20it/s]

Episode 1331/8000, real env return = -120.76


 17%|██████▏                              | 1341/8000 [22:54<1:36:37,  1.15it/s]

Episode 1341/8000, real env return = -118.63


 17%|██████▏                              | 1351/8000 [23:03<1:33:17,  1.19it/s]

Episode 1351/8000, real env return = -111.36


 17%|██████▎                              | 1361/8000 [23:11<1:33:32,  1.18it/s]

Episode 1361/8000, real env return = -110.39


 17%|██████▎                              | 1371/8000 [23:20<1:33:16,  1.18it/s]

Episode 1371/8000, real env return = -110.72


 17%|██████▍                              | 1381/8000 [23:29<1:37:09,  1.14it/s]

Episode 1381/8000, real env return = -110.88


 17%|██████▍                              | 1391/8000 [23:37<1:33:14,  1.18it/s]

Episode 1391/8000, real env return = -109.89


 18%|██████▍                              | 1401/8000 [23:46<1:36:25,  1.14it/s]

Episode 1401/8000, real env return = -109.71


 18%|██████▌                              | 1411/8000 [23:55<1:34:35,  1.16it/s]

Episode 1411/8000, real env return = -109.82


 18%|██████▌                              | 1421/8000 [24:03<1:32:16,  1.19it/s]

Episode 1421/8000, real env return = -110.57


 18%|██████▌                              | 1431/8000 [24:12<1:35:40,  1.14it/s]

Episode 1431/8000, real env return = -110.69


 18%|██████▋                              | 1441/8000 [24:20<1:31:33,  1.19it/s]

Episode 1441/8000, real env return = -109.92


 18%|██████▋                              | 1451/8000 [24:29<1:31:20,  1.19it/s]

Episode 1451/8000, real env return = -109.99


 18%|██████▊                              | 1461/8000 [24:38<1:35:53,  1.14it/s]

Episode 1461/8000, real env return = -111.05


 18%|██████▊                              | 1471/8000 [24:46<1:31:23,  1.19it/s]

Episode 1471/8000, real env return = -111.31


 19%|██████▊                              | 1481/8000 [24:55<1:30:38,  1.20it/s]

Episode 1481/8000, real env return = -110.97


 19%|██████▉                              | 1491/8000 [25:03<1:33:37,  1.16it/s]

Episode 1491/8000, real env return = -111.81


 19%|██████▉                              | 1501/8000 [25:12<1:32:51,  1.17it/s]

Episode 1501/8000, real env return = -111.86


 19%|██████▉                              | 1511/8000 [25:20<1:30:23,  1.20it/s]

Episode 1511/8000, real env return = -111.82


 19%|███████                              | 1521/8000 [25:29<1:31:46,  1.18it/s]

Episode 1521/8000, real env return = -112.35


 19%|███████                              | 1531/8000 [25:37<1:30:04,  1.20it/s]

Episode 1531/8000, real env return = -111.11


 19%|███████▏                             | 1541/8000 [25:46<1:32:39,  1.16it/s]

Episode 1541/8000, real env return = -111.73


 19%|███████▏                             | 1551/8000 [25:54<1:34:14,  1.14it/s]

Episode 1551/8000, real env return = -111.70


 20%|███████▏                             | 1561/8000 [26:03<1:35:10,  1.13it/s]

Episode 1561/8000, real env return = -111.51


 20%|███████▎                             | 1571/8000 [26:12<1:29:47,  1.19it/s]

Episode 1571/8000, real env return = -110.54


 20%|███████▎                             | 1581/8000 [26:20<1:30:26,  1.18it/s]

Episode 1581/8000, real env return = -109.97


 20%|███████▎                             | 1591/8000 [26:29<1:32:06,  1.16it/s]

Episode 1591/8000, real env return = -110.31


 20%|███████▍                             | 1601/8000 [26:37<1:34:01,  1.13it/s]

Episode 1601/8000, real env return = -110.66


 20%|███████▍                             | 1611/8000 [26:46<1:33:59,  1.13it/s]

Episode 1611/8000, real env return = -110.69


 20%|███████▍                             | 1621/8000 [26:54<1:28:14,  1.20it/s]

Episode 1621/8000, real env return = -110.25


 20%|███████▌                             | 1631/8000 [27:03<1:33:22,  1.14it/s]

Episode 1631/8000, real env return = -110.66


 21%|███████▌                             | 1641/8000 [27:15<2:32:19,  1.44s/it]

Episode 1641/8000, real env return = -107.14


 21%|███████▋                             | 1651/8000 [27:24<1:31:30,  1.16it/s]

Episode 1651/8000, real env return = -111.22


 21%|███████▋                             | 1661/8000 [27:32<1:29:50,  1.18it/s]

Episode 1661/8000, real env return = -111.84


 21%|███████▋                             | 1671/8000 [27:41<1:28:51,  1.19it/s]

Episode 1671/8000, real env return = -110.58


 21%|███████▊                             | 1681/8000 [27:49<1:29:18,  1.18it/s]

Episode 1681/8000, real env return = -110.52


 21%|███████▊                             | 1691/8000 [27:58<1:31:40,  1.15it/s]

Episode 1691/8000, real env return = -110.22


 21%|███████▊                             | 1701/8000 [28:06<1:27:52,  1.19it/s]

Episode 1701/8000, real env return = -110.85


 21%|███████▉                             | 1711/8000 [28:15<1:28:26,  1.19it/s]

Episode 1711/8000, real env return = -110.53


 22%|███████▉                             | 1721/8000 [28:23<1:28:44,  1.18it/s]

Episode 1721/8000, real env return = -110.15


 22%|████████                             | 1731/8000 [28:32<1:31:06,  1.15it/s]

Episode 1731/8000, real env return = -110.55


 22%|████████                             | 1741/8000 [28:40<1:26:55,  1.20it/s]

Episode 1741/8000, real env return = -109.62


 22%|████████                             | 1751/8000 [28:49<1:27:06,  1.20it/s]

Episode 1751/8000, real env return = -109.54


 22%|████████▏                            | 1761/8000 [28:57<1:32:12,  1.13it/s]

Episode 1761/8000, real env return = -109.59


 22%|████████▏                            | 1771/8000 [29:06<1:29:53,  1.15it/s]

Episode 1771/8000, real env return = -109.39


 22%|████████▏                            | 1781/8000 [29:14<1:25:39,  1.21it/s]

Episode 1781/8000, real env return = -108.58


 22%|████████▎                            | 1791/8000 [29:23<1:26:25,  1.20it/s]

Episode 1791/8000, real env return = -108.63


 23%|████████▎                            | 1801/8000 [29:31<1:29:37,  1.15it/s]

Episode 1801/8000, real env return = -108.50


 23%|████████▍                            | 1811/8000 [29:40<1:31:28,  1.13it/s]

Episode 1811/8000, real env return = -108.26


 23%|████████▍                            | 1821/8000 [29:48<1:29:53,  1.15it/s]

Episode 1821/8000, real env return = -108.63


 23%|████████▍                            | 1831/8000 [29:57<1:25:19,  1.21it/s]

Episode 1831/8000, real env return = -108.63


 23%|████████▌                            | 1841/8000 [30:06<1:29:08,  1.15it/s]

Episode 1841/8000, real env return = -108.81


 23%|████████▌                            | 1851/8000 [30:14<1:25:37,  1.20it/s]

Episode 1851/8000, real env return = -107.76


 23%|████████▌                            | 1861/8000 [30:22<1:25:19,  1.20it/s]

Episode 1861/8000, real env return = -107.72


 23%|████████▋                            | 1871/8000 [30:31<1:25:19,  1.20it/s]

Episode 1871/8000, real env return = -108.37


 24%|████████▋                            | 1881/8000 [30:40<1:26:49,  1.17it/s]

Episode 1881/8000, real env return = -107.90


 24%|████████▋                            | 1891/8000 [30:48<1:30:20,  1.13it/s]

Episode 1891/8000, real env return = -107.53


 24%|████████▊                            | 1901/8000 [30:57<1:30:25,  1.12it/s]

Episode 1901/8000, real env return = -106.89


 24%|████████▊                            | 1911/8000 [31:05<1:29:04,  1.14it/s]

Episode 1911/8000, real env return = -107.74


 24%|████████▉                            | 1921/8000 [31:14<1:27:46,  1.15it/s]

Episode 1921/8000, real env return = -107.67


 24%|████████▉                            | 1931/8000 [31:22<1:24:41,  1.19it/s]

Episode 1931/8000, real env return = -107.75


 24%|████████▉                            | 1941/8000 [31:31<1:31:20,  1.11it/s]

Episode 1941/8000, real env return = -107.50


 24%|█████████                            | 1951/8000 [31:40<1:23:49,  1.20it/s]

Episode 1951/8000, real env return = -108.14


 25%|█████████                            | 1961/8000 [31:48<1:25:39,  1.18it/s]

Episode 1961/8000, real env return = -108.07


 25%|█████████                            | 1971/8000 [31:57<1:27:28,  1.15it/s]

Episode 1971/8000, real env return = -108.17


 25%|█████████▏                           | 1981/8000 [32:05<1:27:14,  1.15it/s]

Episode 1981/8000, real env return = -108.34


 25%|█████████▏                           | 1991/8000 [32:14<1:25:44,  1.17it/s]

Episode 1991/8000, real env return = -108.10


 25%|█████████▎                           | 2001/8000 [32:22<1:28:10,  1.13it/s]

Episode 2001/8000, real env return = -108.13


 25%|█████████▎                           | 2011/8000 [32:31<1:28:35,  1.13it/s]

Episode 2011/8000, real env return = -108.39


 25%|█████████▎                           | 2021/8000 [32:39<1:24:22,  1.18it/s]

Episode 2021/8000, real env return = -108.15


 25%|█████████▍                           | 2031/8000 [32:48<1:28:59,  1.12it/s]

Episode 2031/8000, real env return = -108.49


 26%|█████████▍                           | 2041/8000 [32:57<1:24:13,  1.18it/s]

Episode 2041/8000, real env return = -105.12


 26%|█████████▍                           | 2051/8000 [33:05<1:22:51,  1.20it/s]

Episode 2051/8000, real env return = -107.92


 26%|█████████▌                           | 2061/8000 [33:14<1:29:19,  1.11it/s]

Episode 2061/8000, real env return = -108.16


 26%|█████████▌                           | 2071/8000 [33:22<1:23:29,  1.18it/s]

Episode 2071/8000, real env return = -108.03


 26%|█████████▌                           | 2081/8000 [33:31<1:23:07,  1.19it/s]

Episode 2081/8000, real env return = -107.13


 26%|█████████▋                           | 2091/8000 [33:40<1:23:53,  1.17it/s]

Episode 2091/8000, real env return = -107.07


 26%|█████████▋                           | 2101/8000 [33:48<1:23:44,  1.17it/s]

Episode 2101/8000, real env return = -105.03


 26%|█████████▊                           | 2111/8000 [33:57<1:24:39,  1.16it/s]

Episode 2111/8000, real env return = -105.08


 27%|█████████▊                           | 2121/8000 [34:05<1:26:20,  1.13it/s]

Episode 2121/8000, real env return = -104.95


 27%|█████████▊                           | 2131/8000 [34:14<1:23:17,  1.17it/s]

Episode 2131/8000, real env return = -104.92


 27%|█████████▉                           | 2141/8000 [34:23<1:25:19,  1.14it/s]

Episode 2141/8000, real env return = -104.80


 27%|█████████▉                           | 2151/8000 [34:31<1:23:32,  1.17it/s]

Episode 2151/8000, real env return = -105.10


 27%|█████████▉                           | 2161/8000 [34:40<1:26:05,  1.13it/s]

Episode 2161/8000, real env return = -105.11


 27%|██████████                           | 2171/8000 [34:49<1:25:33,  1.14it/s]

Episode 2171/8000, real env return = -105.10


 27%|██████████                           | 2181/8000 [34:57<1:20:28,  1.21it/s]

Episode 2181/8000, real env return = -104.63


 27%|██████████▏                          | 2191/8000 [35:06<1:21:11,  1.19it/s]

Episode 2191/8000, real env return = -105.18


 28%|██████████▏                          | 2201/8000 [35:14<1:26:03,  1.12it/s]

Episode 2201/8000, real env return = -105.06


 28%|██████████▏                          | 2211/8000 [35:23<1:23:23,  1.16it/s]

Episode 2211/8000, real env return = -104.90


 28%|██████████▎                          | 2221/8000 [35:32<1:25:59,  1.12it/s]

Episode 2221/8000, real env return = -104.87


 28%|██████████▎                          | 2231/8000 [35:40<1:22:36,  1.16it/s]

Episode 2231/8000, real env return = -104.54


 28%|██████████▎                          | 2241/8000 [35:49<1:21:04,  1.18it/s]

Episode 2241/8000, real env return = -105.12


 28%|██████████▍                          | 2251/8000 [35:57<1:23:25,  1.15it/s]

Episode 2251/8000, real env return = -104.83


 28%|██████████▍                          | 2261/8000 [36:06<1:20:48,  1.18it/s]

Episode 2261/8000, real env return = -104.95


 28%|██████████▌                          | 2271/8000 [36:15<1:23:43,  1.14it/s]

Episode 2271/8000, real env return = -105.02


 29%|██████████▌                          | 2281/8000 [36:23<1:21:50,  1.16it/s]

Episode 2281/8000, real env return = -104.91


 29%|██████████▌                          | 2291/8000 [36:32<1:22:05,  1.16it/s]

Episode 2291/8000, real env return = -105.53


 29%|██████████▋                          | 2301/8000 [36:40<1:20:23,  1.18it/s]

Episode 2301/8000, real env return = -105.42


 29%|██████████▋                          | 2311/8000 [36:49<1:18:57,  1.20it/s]

Episode 2311/8000, real env return = -105.23


 29%|██████████▋                          | 2321/8000 [36:57<1:18:32,  1.21it/s]

Episode 2321/8000, real env return = -105.13


 29%|██████████▊                          | 2331/8000 [37:06<1:21:44,  1.16it/s]

Episode 2331/8000, real env return = -104.85


 29%|██████████▊                          | 2341/8000 [37:15<1:20:39,  1.17it/s]

Episode 2341/8000, real env return = -105.42


 29%|██████████▊                          | 2351/8000 [37:23<1:19:25,  1.19it/s]

Episode 2351/8000, real env return = -105.49


 30%|██████████▉                          | 2361/8000 [37:32<1:22:24,  1.14it/s]

Episode 2361/8000, real env return = -105.35


 30%|██████████▉                          | 2371/8000 [37:41<1:22:07,  1.14it/s]

Episode 2371/8000, real env return = -104.90


 30%|███████████                          | 2381/8000 [37:49<1:18:57,  1.19it/s]

Episode 2381/8000, real env return = -105.11


 30%|███████████                          | 2391/8000 [37:57<1:17:36,  1.20it/s]

Episode 2391/8000, real env return = -105.23


 30%|███████████                          | 2401/8000 [38:06<1:21:36,  1.14it/s]

Episode 2401/8000, real env return = -105.24


 30%|███████████▏                         | 2411/8000 [38:15<1:18:54,  1.18it/s]

Episode 2411/8000, real env return = -105.36


 30%|███████████▏                         | 2421/8000 [38:23<1:18:08,  1.19it/s]

Episode 2421/8000, real env return = -105.35


 30%|███████████▏                         | 2431/8000 [38:32<1:17:27,  1.20it/s]

Episode 2431/8000, real env return = -105.69


 31%|███████████▎                         | 2441/8000 [38:40<1:16:30,  1.21it/s]

Episode 2441/8000, real env return = -105.70


 31%|███████████▎                         | 2451/8000 [38:49<1:21:50,  1.13it/s]

Episode 2451/8000, real env return = -105.13


 31%|███████████▍                         | 2461/8000 [38:57<1:17:14,  1.20it/s]

Episode 2461/8000, real env return = -105.09


 31%|███████████▍                         | 2471/8000 [39:06<1:17:45,  1.19it/s]

Episode 2471/8000, real env return = -105.34


 31%|███████████▍                         | 2481/8000 [39:15<1:21:26,  1.13it/s]

Episode 2481/8000, real env return = -104.67


 31%|███████████▌                         | 2491/8000 [39:23<1:18:45,  1.17it/s]

Episode 2491/8000, real env return = -104.82


 31%|███████████▌                         | 2501/8000 [39:32<1:16:41,  1.20it/s]

Episode 2501/8000, real env return = -104.73


 31%|███████████▌                         | 2511/8000 [39:41<1:21:37,  1.12it/s]

Episode 2511/8000, real env return = -104.58


 32%|███████████▋                         | 2521/8000 [39:49<1:17:11,  1.18it/s]

Episode 2521/8000, real env return = -107.24


 32%|███████████▋                         | 2531/8000 [39:58<1:19:18,  1.15it/s]

Episode 2531/8000, real env return = -107.72


 32%|███████████▊                         | 2541/8000 [40:07<1:19:41,  1.14it/s]

Episode 2541/8000, real env return = -107.85


 32%|███████████▊                         | 2551/8000 [40:15<1:20:59,  1.12it/s]

Episode 2551/8000, real env return = -105.71


 32%|███████████▊                         | 2561/8000 [40:24<1:19:59,  1.13it/s]

Episode 2561/8000, real env return = -105.40


 32%|███████████▉                         | 2571/8000 [40:32<1:16:42,  1.18it/s]

Episode 2571/8000, real env return = -105.40


 32%|███████████▉                         | 2581/8000 [40:41<1:20:26,  1.12it/s]

Episode 2581/8000, real env return = -105.23


 32%|███████████▉                         | 2591/8000 [40:50<1:16:45,  1.17it/s]

Episode 2591/8000, real env return = -104.53


 33%|████████████                         | 2601/8000 [40:58<1:16:58,  1.17it/s]

Episode 2601/8000, real env return = -105.29


 33%|████████████                         | 2611/8000 [41:07<1:16:51,  1.17it/s]

Episode 2611/8000, real env return = -107.34


 33%|████████████                         | 2621/8000 [41:16<1:15:42,  1.18it/s]

Episode 2621/8000, real env return = -106.31


 33%|████████████▏                        | 2631/8000 [41:24<1:19:19,  1.13it/s]

Episode 2631/8000, real env return = -105.70


 33%|████████████▏                        | 2641/8000 [41:33<1:19:04,  1.13it/s]

Episode 2641/8000, real env return = -106.59


 33%|████████████▎                        | 2651/8000 [41:42<1:16:03,  1.17it/s]

Episode 2651/8000, real env return = -105.43


 33%|████████████▎                        | 2661/8000 [41:51<1:18:18,  1.14it/s]

Episode 2661/8000, real env return = -105.39


 33%|████████████▎                        | 2671/8000 [41:59<1:16:46,  1.16it/s]

Episode 2671/8000, real env return = -105.31


 34%|████████████▍                        | 2681/8000 [42:08<1:17:04,  1.15it/s]

Episode 2681/8000, real env return = -105.27


 34%|████████████▍                        | 2691/8000 [42:16<1:16:48,  1.15it/s]

Episode 2691/8000, real env return = -105.38


 34%|████████████▍                        | 2701/8000 [42:25<1:14:01,  1.19it/s]

Episode 2701/8000, real env return = -104.34


 34%|████████████▌                        | 2711/8000 [42:33<1:15:57,  1.16it/s]

Episode 2711/8000, real env return = -105.13


 34%|████████████▌                        | 2721/8000 [42:42<1:16:49,  1.15it/s]

Episode 2721/8000, real env return = -104.94


 34%|████████████▋                        | 2731/8000 [42:51<1:16:09,  1.15it/s]

Episode 2731/8000, real env return = -107.54


 34%|████████████▋                        | 2741/8000 [43:00<1:16:41,  1.14it/s]

Episode 2741/8000, real env return = -105.04


 34%|████████████▋                        | 2751/8000 [43:08<1:14:07,  1.18it/s]

Episode 2751/8000, real env return = -105.34


 35%|████████████▊                        | 2761/8000 [43:17<1:16:47,  1.14it/s]

Episode 2761/8000, real env return = -108.27


 35%|████████████▊                        | 2771/8000 [43:26<1:14:00,  1.18it/s]

Episode 2771/8000, real env return = -108.73


 35%|████████████▊                        | 2781/8000 [43:34<1:18:14,  1.11it/s]

Episode 2781/8000, real env return = -108.41


 35%|████████████▉                        | 2791/8000 [43:43<1:13:32,  1.18it/s]

Episode 2791/8000, real env return = -107.91


 35%|████████████▉                        | 2801/8000 [43:52<1:20:21,  1.08it/s]

Episode 2801/8000, real env return = -105.69


 35%|█████████████                        | 2811/8000 [44:01<1:15:40,  1.14it/s]

Episode 2811/8000, real env return = -105.74


 35%|█████████████                        | 2821/8000 [44:10<1:18:50,  1.09it/s]

Episode 2821/8000, real env return = -105.17


 35%|█████████████                        | 2831/8000 [44:19<1:17:31,  1.11it/s]

Episode 2831/8000, real env return = -105.58


 36%|█████████████▏                       | 2841/8000 [44:28<1:17:43,  1.11it/s]

Episode 2841/8000, real env return = -105.71


 36%|█████████████▏                       | 2851/8000 [44:37<1:15:02,  1.14it/s]

Episode 2851/8000, real env return = -105.52


 36%|█████████████▏                       | 2861/8000 [44:45<1:13:07,  1.17it/s]

Episode 2861/8000, real env return = -105.51


 36%|█████████████▎                       | 2871/8000 [44:54<1:14:41,  1.14it/s]

Episode 2871/8000, real env return = -105.06


 36%|█████████████▎                       | 2881/8000 [45:02<1:12:29,  1.18it/s]

Episode 2881/8000, real env return = -105.41


 36%|█████████████▎                       | 2891/8000 [45:12<1:22:33,  1.03it/s]

Episode 2891/8000, real env return = -105.12


 36%|█████████████▍                       | 2901/8000 [45:20<1:16:45,  1.11it/s]

Episode 2901/8000, real env return = -105.30


 36%|█████████████▍                       | 2911/8000 [45:29<1:16:11,  1.11it/s]

Episode 2911/8000, real env return = -104.88


 37%|█████████████▌                       | 2921/8000 [45:38<1:12:11,  1.17it/s]

Episode 2921/8000, real env return = -105.33


 37%|█████████████▌                       | 2931/8000 [45:47<1:18:51,  1.07it/s]

Episode 2931/8000, real env return = -104.91


 37%|█████████████▌                       | 2941/8000 [45:57<1:18:37,  1.07it/s]

Episode 2941/8000, real env return = -104.87


 37%|█████████████▋                       | 2951/8000 [46:06<1:16:50,  1.10it/s]

Episode 2951/8000, real env return = -107.90


 37%|█████████████▋                       | 2961/8000 [46:15<1:13:54,  1.14it/s]

Episode 2961/8000, real env return = -106.28


 37%|█████████████▋                       | 2971/8000 [46:24<1:16:35,  1.09it/s]

Episode 2971/8000, real env return = -108.10


 37%|█████████████▊                       | 2981/8000 [46:33<1:13:33,  1.14it/s]

Episode 2981/8000, real env return = -105.00


 37%|█████████████▊                       | 2991/8000 [46:42<1:15:44,  1.10it/s]

Episode 2991/8000, real env return = -105.04


 38%|█████████████▉                       | 3001/8000 [46:52<1:15:26,  1.10it/s]

Episode 3001/8000, real env return = -105.22


 38%|█████████████▉                       | 3011/8000 [47:01<1:17:01,  1.08it/s]

Episode 3011/8000, real env return = -104.78


 38%|█████████████▉                       | 3021/8000 [47:11<1:17:24,  1.07it/s]

Episode 3021/8000, real env return = -105.09


 38%|██████████████                       | 3031/8000 [47:20<1:15:52,  1.09it/s]

Episode 3031/8000, real env return = -104.91


 38%|██████████████                       | 3041/8000 [47:29<1:14:38,  1.11it/s]

Episode 3041/8000, real env return = -105.04


 38%|██████████████                       | 3051/8000 [47:39<1:15:05,  1.10it/s]

Episode 3051/8000, real env return = -105.21


 38%|██████████████▏                      | 3061/8000 [47:48<1:14:43,  1.10it/s]

Episode 3061/8000, real env return = -105.15


 38%|██████████████▏                      | 3071/8000 [47:58<1:19:43,  1.03it/s]

Episode 3071/8000, real env return = -105.38


 39%|██████████████▏                      | 3081/8000 [48:07<1:18:29,  1.04it/s]

Episode 3081/8000, real env return = -105.19


 39%|██████████████▎                      | 3091/8000 [48:16<1:15:49,  1.08it/s]

Episode 3091/8000, real env return = -104.23


 39%|██████████████▎                      | 3101/8000 [48:25<1:14:44,  1.09it/s]

Episode 3101/8000, real env return = -105.24


 39%|██████████████▍                      | 3111/8000 [48:35<1:15:35,  1.08it/s]

Episode 3111/8000, real env return = -105.55


 39%|██████████████▍                      | 3121/8000 [48:44<1:12:47,  1.12it/s]

Episode 3121/8000, real env return = -105.16


 39%|██████████████▍                      | 3131/8000 [48:53<1:19:35,  1.02it/s]

Episode 3131/8000, real env return = -105.38


 39%|██████████████▌                      | 3141/8000 [49:03<1:15:34,  1.07it/s]

Episode 3141/8000, real env return = -105.19


 39%|██████████████▌                      | 3151/8000 [49:12<1:16:14,  1.06it/s]

Episode 3151/8000, real env return = -105.37


 40%|██████████████▌                      | 3161/8000 [49:21<1:13:28,  1.10it/s]

Episode 3161/8000, real env return = -105.18


 40%|██████████████▋                      | 3171/8000 [49:30<1:13:19,  1.10it/s]

Episode 3171/8000, real env return = -105.17


 40%|██████████████▋                      | 3181/8000 [49:40<1:12:07,  1.11it/s]

Episode 3181/8000, real env return = -105.27


 40%|██████████████▊                      | 3191/8000 [49:49<1:16:33,  1.05it/s]

Episode 3191/8000, real env return = -105.38


 40%|██████████████▊                      | 3201/8000 [49:58<1:16:33,  1.04it/s]

Episode 3201/8000, real env return = -105.34


 40%|██████████████▊                      | 3211/8000 [50:07<1:13:16,  1.09it/s]

Episode 3211/8000, real env return = -105.47


 40%|██████████████▉                      | 3221/8000 [50:16<1:13:48,  1.08it/s]

Episode 3221/8000, real env return = -105.60


 40%|██████████████▉                      | 3231/8000 [50:26<1:11:24,  1.11it/s]

Episode 3231/8000, real env return = -105.52


 41%|██████████████▉                      | 3241/8000 [50:35<1:12:18,  1.10it/s]

Episode 3241/8000, real env return = -105.46


 41%|███████████████                      | 3251/8000 [50:44<1:11:15,  1.11it/s]

Episode 3251/8000, real env return = -105.86


 41%|███████████████                      | 3261/8000 [50:54<1:16:51,  1.03it/s]

Episode 3261/8000, real env return = -105.05


 41%|███████████████▏                     | 3271/8000 [51:03<1:13:36,  1.07it/s]

Episode 3271/8000, real env return = -105.59


 41%|███████████████▏                     | 3281/8000 [51:12<1:11:45,  1.10it/s]

Episode 3281/8000, real env return = -105.22


 41%|███████████████▏                     | 3291/8000 [51:21<1:11:46,  1.09it/s]

Episode 3291/8000, real env return = -105.22


 41%|███████████████▎                     | 3301/8000 [51:31<1:11:19,  1.10it/s]

Episode 3301/8000, real env return = -105.32


 41%|███████████████▎                     | 3311/8000 [51:40<1:11:11,  1.10it/s]

Episode 3311/8000, real env return = -105.61


 42%|███████████████▎                     | 3321/8000 [51:49<1:16:50,  1.01it/s]

Episode 3321/8000, real env return = -105.45


 42%|███████████████▍                     | 3331/8000 [51:59<1:13:26,  1.06it/s]

Episode 3331/8000, real env return = -105.29


 42%|███████████████▍                     | 3341/8000 [52:08<1:10:27,  1.10it/s]

Episode 3341/8000, real env return = -105.04


 42%|███████████████▍                     | 3351/8000 [52:17<1:10:22,  1.10it/s]

Episode 3351/8000, real env return = -105.49


 42%|███████████████▌                     | 3361/8000 [52:26<1:10:11,  1.10it/s]

Episode 3361/8000, real env return = -105.38


 42%|███████████████▌                     | 3371/8000 [52:36<1:11:22,  1.08it/s]

Episode 3371/8000, real env return = -105.40


 42%|███████████████▋                     | 3381/8000 [52:45<1:10:44,  1.09it/s]

Episode 3381/8000, real env return = -105.57


 42%|███████████████▋                     | 3391/8000 [52:54<1:10:13,  1.09it/s]

Episode 3391/8000, real env return = -105.82


 43%|███████████████▋                     | 3401/8000 [53:03<1:09:28,  1.10it/s]

Episode 3401/8000, real env return = -105.53


 43%|███████████████▊                     | 3411/8000 [53:13<1:09:53,  1.09it/s]

Episode 3411/8000, real env return = -105.11


 43%|███████████████▊                     | 3421/8000 [53:22<1:12:47,  1.05it/s]

Episode 3421/8000, real env return = -105.30


 43%|███████████████▊                     | 3431/8000 [53:31<1:09:25,  1.10it/s]

Episode 3431/8000, real env return = -105.44


 43%|███████████████▉                     | 3441/8000 [53:40<1:08:58,  1.10it/s]

Episode 3441/8000, real env return = -104.95


 43%|███████████████▉                     | 3451/8000 [53:50<1:14:09,  1.02it/s]

Episode 3451/8000, real env return = -104.97


 43%|████████████████                     | 3461/8000 [53:59<1:11:21,  1.06it/s]

Episode 3461/8000, real env return = -104.91


 43%|████████████████                     | 3471/8000 [54:08<1:05:43,  1.15it/s]

Episode 3471/8000, real env return = -105.32


 44%|████████████████                     | 3481/8000 [54:17<1:04:36,  1.17it/s]

Episode 3481/8000, real env return = -104.72


 44%|████████████████▏                    | 3491/8000 [54:25<1:05:34,  1.15it/s]

Episode 3491/8000, real env return = -105.19


 44%|████████████████▏                    | 3501/8000 [54:34<1:06:00,  1.14it/s]

Episode 3501/8000, real env return = -105.08


 44%|████████████████▏                    | 3511/8000 [54:43<1:06:10,  1.13it/s]

Episode 3511/8000, real env return = -105.68


 44%|████████████████▎                    | 3521/8000 [54:52<1:05:58,  1.13it/s]

Episode 3521/8000, real env return = -105.46


 44%|████████████████▎                    | 3531/8000 [55:00<1:04:29,  1.15it/s]

Episode 3531/8000, real env return = -105.55


 44%|████████████████▍                    | 3541/8000 [55:09<1:05:34,  1.13it/s]

Episode 3541/8000, real env return = -105.05


 44%|████████████████▍                    | 3551/8000 [55:18<1:02:30,  1.19it/s]

Episode 3551/8000, real env return = -105.26


 45%|████████████████▍                    | 3561/8000 [55:27<1:03:38,  1.16it/s]

Episode 3561/8000, real env return = -105.34


 45%|████████████████▌                    | 3571/8000 [55:35<1:04:12,  1.15it/s]

Episode 3571/8000, real env return = -105.85


 45%|████████████████▌                    | 3581/8000 [55:44<1:03:35,  1.16it/s]

Episode 3581/8000, real env return = -105.61


 45%|████████████████▌                    | 3591/8000 [55:53<1:05:35,  1.12it/s]

Episode 3591/8000, real env return = -105.69


 45%|████████████████▋                    | 3601/8000 [56:01<1:03:35,  1.15it/s]

Episode 3601/8000, real env return = -105.54


 45%|████████████████▋                    | 3611/8000 [56:10<1:02:58,  1.16it/s]

Episode 3611/8000, real env return = -105.29


 45%|████████████████▋                    | 3621/8000 [56:19<1:04:18,  1.13it/s]

Episode 3621/8000, real env return = -105.20


 45%|████████████████▊                    | 3631/8000 [56:28<1:04:19,  1.13it/s]

Episode 3631/8000, real env return = -104.61


 46%|████████████████▊                    | 3641/8000 [56:36<1:03:14,  1.15it/s]

Episode 3641/8000, real env return = -104.73


 46%|████████████████▉                    | 3651/8000 [56:45<1:03:24,  1.14it/s]

Episode 3651/8000, real env return = -104.92


 46%|████████████████▉                    | 3661/8000 [56:54<1:02:23,  1.16it/s]

Episode 3661/8000, real env return = -105.37


 46%|████████████████▉                    | 3671/8000 [57:02<1:01:27,  1.17it/s]

Episode 3671/8000, real env return = -105.58


 46%|█████████████████                    | 3681/8000 [57:11<1:03:41,  1.13it/s]

Episode 3681/8000, real env return = -105.37


 46%|█████████████████                    | 3691/8000 [57:20<1:02:22,  1.15it/s]

Episode 3691/8000, real env return = -105.29


 46%|█████████████████                    | 3701/8000 [57:29<1:02:07,  1.15it/s]

Episode 3701/8000, real env return = -105.53


 46%|█████████████████▏                   | 3711/8000 [57:37<1:01:58,  1.15it/s]

Episode 3711/8000, real env return = -105.31


 47%|█████████████████▏                   | 3721/8000 [57:46<1:02:29,  1.14it/s]

Episode 3721/8000, real env return = -105.32


 47%|█████████████████▎                   | 3731/8000 [57:55<1:01:30,  1.16it/s]

Episode 3731/8000, real env return = -105.35


 47%|█████████████████▎                   | 3741/8000 [58:03<1:00:04,  1.18it/s]

Episode 3741/8000, real env return = -105.14


 47%|█████████████████▎                   | 3751/8000 [58:12<1:00:02,  1.18it/s]

Episode 3751/8000, real env return = -105.82


 47%|█████████████████▍                   | 3761/8000 [58:21<1:01:12,  1.15it/s]

Episode 3761/8000, real env return = -106.12


 47%|█████████████████▍                   | 3771/8000 [58:29<1:00:29,  1.17it/s]

Episode 3771/8000, real env return = -105.69


 47%|█████████████████▍                   | 3781/8000 [58:38<1:01:50,  1.14it/s]

Episode 3781/8000, real env return = -105.84


 47%|█████████████████▌                   | 3791/8000 [58:47<1:00:10,  1.17it/s]

Episode 3791/8000, real env return = -103.88


 48%|█████████████████▌                   | 3801/8000 [58:56<1:01:24,  1.14it/s]

Episode 3801/8000, real env return = -105.59


 48%|█████████████████▋                   | 3811/8000 [59:04<1:00:32,  1.15it/s]

Episode 3811/8000, real env return = -106.28


 48%|█████████████████▋                   | 3821/8000 [59:13<1:00:56,  1.14it/s]

Episode 3821/8000, real env return = -105.98


 48%|█████████████████▋                   | 3831/8000 [59:22<1:01:23,  1.13it/s]

Episode 3831/8000, real env return = -105.63


 48%|█████████████████▊                   | 3841/8000 [59:31<1:01:06,  1.13it/s]

Episode 3841/8000, real env return = -105.46


 48%|█████████████████▊                   | 3851/8000 [59:40<1:01:50,  1.12it/s]

Episode 3851/8000, real env return = -106.09


 48%|█████████████████▊                   | 3861/8000 [59:48<1:00:00,  1.15it/s]

Episode 3861/8000, real env return = -106.18


 48%|█████████████████▉                   | 3871/8000 [59:57<1:01:00,  1.13it/s]

Episode 3871/8000, real env return = -105.62


 49%|█████████████████▉                   | 3881/8000 [1:00:06<58:56,  1.16it/s]

Episode 3881/8000, real env return = -104.78


 49%|█████████████████                  | 3891/8000 [1:00:14<1:00:29,  1.13it/s]

Episode 3891/8000, real env return = -105.83


 49%|██████████████████                   | 3901/8000 [1:00:23<59:30,  1.15it/s]

Episode 3901/8000, real env return = -106.14


 49%|█████████████████                  | 3911/8000 [1:00:32<1:00:22,  1.13it/s]

Episode 3911/8000, real env return = -105.68


 49%|██████████████████▏                  | 3921/8000 [1:00:41<58:37,  1.16it/s]

Episode 3921/8000, real env return = -105.83


 49%|██████████████████▏                  | 3931/8000 [1:00:49<59:15,  1.14it/s]

Episode 3931/8000, real env return = -105.86


 49%|██████████████████▏                  | 3941/8000 [1:00:58<59:31,  1.14it/s]

Episode 3941/8000, real env return = -106.39


 49%|██████████████████▎                  | 3951/8000 [1:01:07<59:19,  1.14it/s]

Episode 3951/8000, real env return = -106.41


 50%|██████████████████▎                  | 3961/8000 [1:01:16<57:08,  1.18it/s]

Episode 3961/8000, real env return = -106.65


 50%|██████████████████▎                  | 3971/8000 [1:01:24<58:21,  1.15it/s]

Episode 3971/8000, real env return = -106.23


 50%|██████████████████▍                  | 3981/8000 [1:01:33<58:55,  1.14it/s]

Episode 3981/8000, real env return = -106.09


 50%|██████████████████▍                  | 3991/8000 [1:01:42<56:34,  1.18it/s]

Episode 3991/8000, real env return = -106.29


 50%|██████████████████▌                  | 4001/8000 [1:01:50<57:29,  1.16it/s]

Episode 4001/8000, real env return = -106.21


 50%|██████████████████▌                  | 4011/8000 [1:01:59<58:14,  1.14it/s]

Episode 4011/8000, real env return = -106.17


 50%|██████████████████▌                  | 4021/8000 [1:02:08<58:13,  1.14it/s]

Episode 4021/8000, real env return = -105.41


 50%|██████████████████▋                  | 4031/8000 [1:02:17<57:49,  1.14it/s]

Episode 4031/8000, real env return = -106.36


 51%|██████████████████▋                  | 4041/8000 [1:02:25<57:14,  1.15it/s]

Episode 4041/8000, real env return = -106.65


 51%|██████████████████▋                  | 4051/8000 [1:02:34<57:16,  1.15it/s]

Episode 4051/8000, real env return = -106.15


 51%|██████████████████▊                  | 4061/8000 [1:02:43<58:33,  1.12it/s]

Episode 4061/8000, real env return = -105.92


 51%|██████████████████▊                  | 4071/8000 [1:02:51<54:31,  1.20it/s]

Episode 4071/8000, real env return = -106.48


 51%|██████████████████▊                  | 4081/8000 [1:03:00<55:36,  1.17it/s]

Episode 4081/8000, real env return = -105.37


 51%|██████████████████▉                  | 4091/8000 [1:03:09<55:52,  1.17it/s]

Episode 4091/8000, real env return = -105.62


 51%|██████████████████▉                  | 4101/8000 [1:03:18<57:41,  1.13it/s]

Episode 4101/8000, real env return = -106.10


 51%|███████████████████                  | 4111/8000 [1:03:26<55:28,  1.17it/s]

Episode 4111/8000, real env return = -106.79


 52%|███████████████████                  | 4121/8000 [1:03:35<56:36,  1.14it/s]

Episode 4121/8000, real env return = -105.48


 52%|███████████████████                  | 4131/8000 [1:03:44<56:20,  1.14it/s]

Episode 4131/8000, real env return = -105.86


 52%|███████████████████▏                 | 4141/8000 [1:03:53<55:42,  1.15it/s]

Episode 4141/8000, real env return = -105.87


 52%|███████████████████▏                 | 4151/8000 [1:04:01<55:25,  1.16it/s]

Episode 4151/8000, real env return = -106.10


 52%|███████████████████▏                 | 4161/8000 [1:04:10<54:56,  1.16it/s]

Episode 4161/8000, real env return = -105.92


 52%|███████████████████▎                 | 4171/8000 [1:04:19<56:37,  1.13it/s]

Episode 4171/8000, real env return = -105.47


 52%|███████████████████▎                 | 4181/8000 [1:04:28<54:51,  1.16it/s]

Episode 4181/8000, real env return = -105.62


 52%|███████████████████▍                 | 4191/8000 [1:04:36<56:37,  1.12it/s]

Episode 4191/8000, real env return = -104.73


 53%|███████████████████▍                 | 4201/8000 [1:04:45<55:57,  1.13it/s]

Episode 4201/8000, real env return = -105.84


 53%|███████████████████▍                 | 4211/8000 [1:04:54<53:46,  1.17it/s]

Episode 4211/8000, real env return = -106.14


 53%|███████████████████▌                 | 4221/8000 [1:05:02<53:32,  1.18it/s]

Episode 4221/8000, real env return = -106.17


 53%|███████████████████▌                 | 4231/8000 [1:05:11<55:31,  1.13it/s]

Episode 4231/8000, real env return = -106.46


 53%|███████████████████▌                 | 4241/8000 [1:05:20<54:52,  1.14it/s]

Episode 4241/8000, real env return = -105.10


 53%|███████████████████▋                 | 4251/8000 [1:05:28<52:53,  1.18it/s]

Episode 4251/8000, real env return = -107.38


 53%|███████████████████▋                 | 4261/8000 [1:05:37<53:32,  1.16it/s]

Episode 4261/8000, real env return = -106.33


 53%|███████████████████▊                 | 4271/8000 [1:05:46<54:34,  1.14it/s]

Episode 4271/8000, real env return = -106.69


 54%|███████████████████▊                 | 4281/8000 [1:05:55<54:12,  1.14it/s]

Episode 4281/8000, real env return = -106.41


 54%|███████████████████▊                 | 4291/8000 [1:06:04<54:07,  1.14it/s]

Episode 4291/8000, real env return = -107.21


 54%|███████████████████▉                 | 4301/8000 [1:06:12<53:54,  1.14it/s]

Episode 4301/8000, real env return = -105.73


 54%|███████████████████▉                 | 4311/8000 [1:06:21<53:00,  1.16it/s]

Episode 4311/8000, real env return = -106.69


 54%|███████████████████▉                 | 4321/8000 [1:06:30<53:46,  1.14it/s]

Episode 4321/8000, real env return = -106.27


 54%|████████████████████                 | 4331/8000 [1:06:38<52:37,  1.16it/s]

Episode 4331/8000, real env return = -106.87


 54%|████████████████████                 | 4341/8000 [1:06:47<53:53,  1.13it/s]

Episode 4341/8000, real env return = -106.36


 54%|████████████████████                 | 4351/8000 [1:06:56<53:24,  1.14it/s]

Episode 4351/8000, real env return = -107.39


 55%|████████████████████▏                | 4361/8000 [1:07:05<52:54,  1.15it/s]

Episode 4361/8000, real env return = -106.96


 55%|████████████████████▏                | 4371/8000 [1:07:13<52:28,  1.15it/s]

Episode 4371/8000, real env return = -106.10


 55%|████████████████████▎                | 4381/8000 [1:07:22<52:09,  1.16it/s]

Episode 4381/8000, real env return = -106.07


 55%|████████████████████▎                | 4391/8000 [1:07:31<52:00,  1.16it/s]

Episode 4391/8000, real env return = -105.52


 55%|████████████████████▎                | 4401/8000 [1:07:40<52:56,  1.13it/s]

Episode 4401/8000, real env return = -106.21


 55%|████████████████████▍                | 4411/8000 [1:07:48<51:46,  1.16it/s]

Episode 4411/8000, real env return = -106.64


 55%|████████████████████▍                | 4421/8000 [1:07:57<51:59,  1.15it/s]

Episode 4421/8000, real env return = -106.60


 55%|████████████████████▍                | 4431/8000 [1:08:06<51:58,  1.14it/s]

Episode 4431/8000, real env return = -106.39


 56%|████████████████████▌                | 4441/8000 [1:08:14<52:04,  1.14it/s]

Episode 4441/8000, real env return = -106.69


 56%|████████████████████▌                | 4451/8000 [1:08:23<52:06,  1.14it/s]

Episode 4451/8000, real env return = -105.86


 56%|████████████████████▋                | 4461/8000 [1:08:32<50:35,  1.17it/s]

Episode 4461/8000, real env return = -106.63


 56%|████████████████████▋                | 4471/8000 [1:08:40<52:09,  1.13it/s]

Episode 4471/8000, real env return = -106.63


 56%|████████████████████▋                | 4481/8000 [1:08:49<50:08,  1.17it/s]

Episode 4481/8000, real env return = -106.43


 56%|████████████████████▊                | 4491/8000 [1:08:58<50:40,  1.15it/s]

Episode 4491/8000, real env return = -106.41


 56%|████████████████████▊                | 4501/8000 [1:09:07<49:34,  1.18it/s]

Episode 4501/8000, real env return = -105.91


 56%|████████████████████▊                | 4511/8000 [1:09:15<51:01,  1.14it/s]

Episode 4511/8000, real env return = -106.20


 57%|████████████████████▉                | 4521/8000 [1:09:24<51:05,  1.13it/s]

Episode 4521/8000, real env return = -106.13


 57%|████████████████████▉                | 4531/8000 [1:09:33<50:23,  1.15it/s]

Episode 4531/8000, real env return = -105.92


 57%|█████████████████████                | 4541/8000 [1:09:41<50:43,  1.14it/s]

Episode 4541/8000, real env return = -105.78


 57%|█████████████████████                | 4551/8000 [1:09:50<50:27,  1.14it/s]

Episode 4551/8000, real env return = -105.39


 57%|█████████████████████                | 4561/8000 [1:09:59<50:00,  1.15it/s]

Episode 4561/8000, real env return = -105.58


 57%|█████████████████████▏               | 4571/8000 [1:10:08<49:29,  1.15it/s]

Episode 4571/8000, real env return = -105.73


 57%|█████████████████████▏               | 4581/8000 [1:10:16<49:26,  1.15it/s]

Episode 4581/8000, real env return = -105.67


 57%|█████████████████████▏               | 4591/8000 [1:10:25<48:49,  1.16it/s]

Episode 4591/8000, real env return = -105.76


 58%|█████████████████████▎               | 4601/8000 [1:10:34<50:36,  1.12it/s]

Episode 4601/8000, real env return = -106.23


 58%|█████████████████████▎               | 4611/8000 [1:10:43<49:14,  1.15it/s]

Episode 4611/8000, real env return = -105.88


 58%|█████████████████████▎               | 4621/8000 [1:10:51<49:07,  1.15it/s]

Episode 4621/8000, real env return = -105.91


 58%|█████████████████████▍               | 4631/8000 [1:11:00<48:43,  1.15it/s]

Episode 4631/8000, real env return = -105.43


 58%|█████████████████████▍               | 4641/8000 [1:11:09<48:51,  1.15it/s]

Episode 4641/8000, real env return = -105.96


 58%|█████████████████████▌               | 4651/8000 [1:11:17<47:41,  1.17it/s]

Episode 4651/8000, real env return = -106.93


 58%|█████████████████████▌               | 4661/8000 [1:11:26<48:01,  1.16it/s]

Episode 4661/8000, real env return = -105.52


 58%|█████████████████████▌               | 4671/8000 [1:11:35<47:47,  1.16it/s]

Episode 4671/8000, real env return = -106.19


 59%|█████████████████████▋               | 4681/8000 [1:11:44<49:09,  1.13it/s]

Episode 4681/8000, real env return = -106.29


 59%|█████████████████████▋               | 4691/8000 [1:11:52<47:08,  1.17it/s]

Episode 4691/8000, real env return = -106.31


 59%|█████████████████████▋               | 4701/8000 [1:12:01<48:40,  1.13it/s]

Episode 4701/8000, real env return = -105.57


 59%|█████████████████████▊               | 4711/8000 [1:12:10<47:02,  1.17it/s]

Episode 4711/8000, real env return = -106.70


 59%|█████████████████████▊               | 4721/8000 [1:12:19<48:15,  1.13it/s]

Episode 4721/8000, real env return = -106.49


 59%|█████████████████████▉               | 4731/8000 [1:12:27<47:51,  1.14it/s]

Episode 4731/8000, real env return = -106.13


 59%|█████████████████████▉               | 4741/8000 [1:12:36<45:56,  1.18it/s]

Episode 4741/8000, real env return = -106.19


 59%|█████████████████████▉               | 4751/8000 [1:12:45<47:23,  1.14it/s]

Episode 4751/8000, real env return = -106.26


 60%|██████████████████████               | 4761/8000 [1:12:53<46:33,  1.16it/s]

Episode 4761/8000, real env return = -106.77


 60%|██████████████████████               | 4771/8000 [1:13:02<47:37,  1.13it/s]

Episode 4771/8000, real env return = -106.43


 60%|██████████████████████               | 4781/8000 [1:13:11<47:08,  1.14it/s]

Episode 4781/8000, real env return = -105.70


 60%|██████████████████████▏              | 4791/8000 [1:13:20<46:27,  1.15it/s]

Episode 4791/8000, real env return = -106.48


 60%|██████████████████████▏              | 4801/8000 [1:13:28<48:07,  1.11it/s]

Episode 4801/8000, real env return = -106.62


 60%|██████████████████████▎              | 4811/8000 [1:13:37<46:51,  1.13it/s]

Episode 4811/8000, real env return = -106.35


 60%|██████████████████████▎              | 4821/8000 [1:13:46<46:18,  1.14it/s]

Episode 4821/8000, real env return = -106.43


 60%|██████████████████████▎              | 4831/8000 [1:13:55<46:19,  1.14it/s]

Episode 4831/8000, real env return = -106.64


 61%|██████████████████████▍              | 4841/8000 [1:14:03<45:06,  1.17it/s]

Episode 4841/8000, real env return = -106.45


 61%|██████████████████████▍              | 4851/8000 [1:14:12<46:04,  1.14it/s]

Episode 4851/8000, real env return = -107.26


 61%|██████████████████████▍              | 4861/8000 [1:14:21<45:19,  1.15it/s]

Episode 4861/8000, real env return = -106.66


 61%|██████████████████████▌              | 4871/8000 [1:14:29<45:46,  1.14it/s]

Episode 4871/8000, real env return = -107.40


 61%|██████████████████████▌              | 4881/8000 [1:14:38<45:55,  1.13it/s]

Episode 4881/8000, real env return = -106.89


 61%|██████████████████████▌              | 4891/8000 [1:14:47<45:31,  1.14it/s]

Episode 4891/8000, real env return = -106.90


 61%|██████████████████████▋              | 4901/8000 [1:14:56<45:35,  1.13it/s]

Episode 4901/8000, real env return = -106.98


 61%|██████████████████████▋              | 4911/8000 [1:15:05<45:19,  1.14it/s]

Episode 4911/8000, real env return = -106.78


 62%|██████████████████████▊              | 4921/8000 [1:15:13<44:39,  1.15it/s]

Episode 4921/8000, real env return = -106.95


 62%|██████████████████████▊              | 4931/8000 [1:15:22<44:58,  1.14it/s]

Episode 4931/8000, real env return = -106.92


 62%|██████████████████████▊              | 4941/8000 [1:15:31<44:16,  1.15it/s]

Episode 4941/8000, real env return = -106.82


 62%|██████████████████████▉              | 4951/8000 [1:15:39<44:29,  1.14it/s]

Episode 4951/8000, real env return = -106.98


 62%|██████████████████████▉              | 4961/8000 [1:15:48<44:16,  1.14it/s]

Episode 4961/8000, real env return = -106.57


 62%|██████████████████████▉              | 4971/8000 [1:15:57<44:29,  1.13it/s]

Episode 4971/8000, real env return = -106.86


 62%|███████████████████████              | 4981/8000 [1:16:05<44:03,  1.14it/s]

Episode 4981/8000, real env return = -106.87


 62%|███████████████████████              | 4991/8000 [1:16:14<42:54,  1.17it/s]

Episode 4991/8000, real env return = -106.30


 63%|███████████████████████▏             | 5001/8000 [1:16:23<43:56,  1.14it/s]

Episode 5001/8000, real env return = -106.44


 63%|███████████████████████▏             | 5011/8000 [1:16:32<43:05,  1.16it/s]

Episode 5011/8000, real env return = -106.91


 63%|███████████████████████▏             | 5021/8000 [1:16:40<43:56,  1.13it/s]

Episode 5021/8000, real env return = -106.73


 63%|███████████████████████▎             | 5031/8000 [1:16:49<43:06,  1.15it/s]

Episode 5031/8000, real env return = -106.49


 63%|███████████████████████▎             | 5041/8000 [1:16:58<43:34,  1.13it/s]

Episode 5041/8000, real env return = -106.19


 63%|███████████████████████▎             | 5051/8000 [1:17:07<43:00,  1.14it/s]

Episode 5051/8000, real env return = -105.96


 63%|███████████████████████▍             | 5061/8000 [1:17:15<42:04,  1.16it/s]

Episode 5061/8000, real env return = -106.47


 63%|███████████████████████▍             | 5071/8000 [1:17:24<44:02,  1.11it/s]

Episode 5071/8000, real env return = -106.64


 64%|███████████████████████▍             | 5081/8000 [1:17:33<41:33,  1.17it/s]

Episode 5081/8000, real env return = -105.90


 64%|███████████████████████▌             | 5091/8000 [1:17:42<41:40,  1.16it/s]

Episode 5091/8000, real env return = -106.68


 64%|███████████████████████▌             | 5101/8000 [1:17:50<42:18,  1.14it/s]

Episode 5101/8000, real env return = -106.68


 64%|███████████████████████▋             | 5111/8000 [1:17:59<42:06,  1.14it/s]

Episode 5111/8000, real env return = -106.19


 64%|███████████████████████▋             | 5121/8000 [1:18:08<41:26,  1.16it/s]

Episode 5121/8000, real env return = -105.78


 64%|███████████████████████▋             | 5131/8000 [1:18:16<41:26,  1.15it/s]

Episode 5131/8000, real env return = -106.44


 64%|███████████████████████▊             | 5141/8000 [1:18:25<41:52,  1.14it/s]

Episode 5141/8000, real env return = -106.31


 64%|███████████████████████▊             | 5151/8000 [1:18:34<41:03,  1.16it/s]

Episode 5151/8000, real env return = -106.56


 65%|███████████████████████▊             | 5161/8000 [1:18:43<41:41,  1.14it/s]

Episode 5161/8000, real env return = -106.30


 65%|███████████████████████▉             | 5171/8000 [1:18:51<41:13,  1.14it/s]

Episode 5171/8000, real env return = -106.88


 65%|███████████████████████▉             | 5181/8000 [1:19:00<41:27,  1.13it/s]

Episode 5181/8000, real env return = -107.07


 65%|████████████████████████             | 5191/8000 [1:19:09<40:38,  1.15it/s]

Episode 5191/8000, real env return = -107.05


 65%|████████████████████████             | 5201/8000 [1:19:18<41:30,  1.12it/s]

Episode 5201/8000, real env return = -106.75


 65%|████████████████████████             | 5211/8000 [1:19:26<41:00,  1.13it/s]

Episode 5211/8000, real env return = -106.89


 65%|████████████████████████▏            | 5221/8000 [1:19:35<40:45,  1.14it/s]

Episode 5221/8000, real env return = -106.97


 65%|████████████████████████▏            | 5231/8000 [1:19:44<40:46,  1.13it/s]

Episode 5231/8000, real env return = -106.63


 66%|████████████████████████▏            | 5241/8000 [1:19:53<39:52,  1.15it/s]

Episode 5241/8000, real env return = -106.87


 66%|████████████████████████▎            | 5251/8000 [1:20:02<39:57,  1.15it/s]

Episode 5251/8000, real env return = -106.71


 66%|████████████████████████▎            | 5261/8000 [1:20:10<39:55,  1.14it/s]

Episode 5261/8000, real env return = -106.77


 66%|████████████████████████▍            | 5271/8000 [1:20:19<39:24,  1.15it/s]

Episode 5271/8000, real env return = -106.75


 66%|████████████████████████▍            | 5281/8000 [1:20:28<39:18,  1.15it/s]

Episode 5281/8000, real env return = -107.08


 66%|████████████████████████▍            | 5291/8000 [1:20:36<37:30,  1.20it/s]

Episode 5291/8000, real env return = -106.61


 66%|████████████████████████▌            | 5301/8000 [1:20:45<39:46,  1.13it/s]

Episode 5301/8000, real env return = -106.83


 66%|████████████████████████▌            | 5311/8000 [1:20:54<39:05,  1.15it/s]

Episode 5311/8000, real env return = -106.84


 67%|████████████████████████▌            | 5321/8000 [1:21:02<38:38,  1.16it/s]

Episode 5321/8000, real env return = -106.81


 67%|████████████████████████▋            | 5331/8000 [1:21:11<38:36,  1.15it/s]

Episode 5331/8000, real env return = -107.25


 67%|████████████████████████▋            | 5341/8000 [1:21:20<38:09,  1.16it/s]

Episode 5341/8000, real env return = -106.82


 67%|████████████████████████▋            | 5351/8000 [1:21:28<37:38,  1.17it/s]

Episode 5351/8000, real env return = -106.66


 67%|████████████████████████▊            | 5361/8000 [1:21:37<36:53,  1.19it/s]

Episode 5361/8000, real env return = -106.38


 67%|████████████████████████▊            | 5371/8000 [1:21:46<36:59,  1.18it/s]

Episode 5371/8000, real env return = -105.85


 67%|████████████████████████▉            | 5381/8000 [1:21:55<37:37,  1.16it/s]

Episode 5381/8000, real env return = -106.83


 67%|████████████████████████▉            | 5391/8000 [1:22:03<37:37,  1.16it/s]

Episode 5391/8000, real env return = -105.54


 68%|████████████████████████▉            | 5401/8000 [1:22:12<37:51,  1.14it/s]

Episode 5401/8000, real env return = -106.70


 68%|█████████████████████████            | 5411/8000 [1:22:21<37:17,  1.16it/s]

Episode 5411/8000, real env return = -106.52


 68%|█████████████████████████            | 5421/8000 [1:22:29<37:53,  1.13it/s]

Episode 5421/8000, real env return = -106.72


 68%|█████████████████████████            | 5431/8000 [1:22:38<37:17,  1.15it/s]

Episode 5431/8000, real env return = -106.69


 68%|█████████████████████████▏           | 5441/8000 [1:22:47<37:16,  1.14it/s]

Episode 5441/8000, real env return = -106.58


 68%|█████████████████████████▏           | 5451/8000 [1:22:56<37:24,  1.14it/s]

Episode 5451/8000, real env return = -106.71


 68%|█████████████████████████▎           | 5461/8000 [1:23:04<36:29,  1.16it/s]

Episode 5461/8000, real env return = -106.44


 68%|█████████████████████████▎           | 5471/8000 [1:23:13<37:05,  1.14it/s]

Episode 5471/8000, real env return = -105.74


 69%|█████████████████████████▎           | 5481/8000 [1:23:22<36:56,  1.14it/s]

Episode 5481/8000, real env return = -107.63


 69%|█████████████████████████▍           | 5491/8000 [1:23:31<36:29,  1.15it/s]

Episode 5491/8000, real env return = -107.22


 69%|█████████████████████████▍           | 5501/8000 [1:23:39<36:30,  1.14it/s]

Episode 5501/8000, real env return = -107.34


 69%|█████████████████████████▍           | 5511/8000 [1:23:48<35:58,  1.15it/s]

Episode 5511/8000, real env return = -106.47


 69%|█████████████████████████▌           | 5521/8000 [1:23:57<36:01,  1.15it/s]

Episode 5521/8000, real env return = -106.72


 69%|█████████████████████████▌           | 5531/8000 [1:24:06<36:26,  1.13it/s]

Episode 5531/8000, real env return = -106.66


 69%|█████████████████████████▋           | 5541/8000 [1:24:14<35:39,  1.15it/s]

Episode 5541/8000, real env return = -106.40


 69%|█████████████████████████▋           | 5551/8000 [1:24:23<35:48,  1.14it/s]

Episode 5551/8000, real env return = -106.83


 70%|█████████████████████████▋           | 5561/8000 [1:24:32<35:32,  1.14it/s]

Episode 5561/8000, real env return = -106.75


 70%|█████████████████████████▊           | 5571/8000 [1:24:40<34:51,  1.16it/s]

Episode 5571/8000, real env return = -106.89


 70%|█████████████████████████▊           | 5581/8000 [1:24:49<35:05,  1.15it/s]

Episode 5581/8000, real env return = -106.79


 70%|█████████████████████████▊           | 5591/8000 [1:24:58<35:22,  1.13it/s]

Episode 5591/8000, real env return = -107.12


 70%|█████████████████████████▉           | 5601/8000 [1:25:07<35:11,  1.14it/s]

Episode 5601/8000, real env return = -106.65


 70%|█████████████████████████▉           | 5611/8000 [1:25:15<35:08,  1.13it/s]

Episode 5611/8000, real env return = -106.38


 70%|█████████████████████████▉           | 5621/8000 [1:25:24<34:37,  1.15it/s]

Episode 5621/8000, real env return = -106.44


 70%|██████████████████████████           | 5631/8000 [1:25:33<33:08,  1.19it/s]

Episode 5631/8000, real env return = -106.44


 71%|██████████████████████████           | 5641/8000 [1:25:41<34:26,  1.14it/s]

Episode 5641/8000, real env return = -106.11


 71%|██████████████████████████▏          | 5651/8000 [1:25:50<34:38,  1.13it/s]

Episode 5651/8000, real env return = -105.96


 71%|██████████████████████████▏          | 5661/8000 [1:25:59<34:05,  1.14it/s]

Episode 5661/8000, real env return = -106.19


 71%|██████████████████████████▏          | 5671/8000 [1:26:08<34:34,  1.12it/s]

Episode 5671/8000, real env return = -107.06


 71%|██████████████████████████▎          | 5681/8000 [1:26:16<33:49,  1.14it/s]

Episode 5681/8000, real env return = -106.85


 71%|██████████████████████████▎          | 5691/8000 [1:26:25<33:56,  1.13it/s]

Episode 5691/8000, real env return = -106.01


 71%|██████████████████████████▎          | 5701/8000 [1:26:34<33:39,  1.14it/s]

Episode 5701/8000, real env return = -105.70


 71%|██████████████████████████▍          | 5711/8000 [1:26:42<33:25,  1.14it/s]

Episode 5711/8000, real env return = -107.05


 72%|██████████████████████████▍          | 5721/8000 [1:26:51<32:37,  1.16it/s]

Episode 5721/8000, real env return = -106.76


 72%|██████████████████████████▌          | 5731/8000 [1:27:00<33:04,  1.14it/s]

Episode 5731/8000, real env return = -106.82


 72%|██████████████████████████▌          | 5741/8000 [1:27:09<32:57,  1.14it/s]

Episode 5741/8000, real env return = -106.66


 72%|██████████████████████████▌          | 5751/8000 [1:27:17<32:51,  1.14it/s]

Episode 5751/8000, real env return = -106.99


 72%|██████████████████████████▋          | 5761/8000 [1:27:26<31:24,  1.19it/s]

Episode 5761/8000, real env return = -107.04


 72%|██████████████████████████▋          | 5771/8000 [1:27:35<31:53,  1.16it/s]

Episode 5771/8000, real env return = -106.70


 72%|██████████████████████████▋          | 5781/8000 [1:27:44<32:24,  1.14it/s]

Episode 5781/8000, real env return = -107.28


 72%|██████████████████████████▊          | 5791/8000 [1:27:52<31:31,  1.17it/s]

Episode 5791/8000, real env return = -106.98


 73%|██████████████████████████▊          | 5801/8000 [1:28:01<31:43,  1.16it/s]

Episode 5801/8000, real env return = -106.83


 73%|██████████████████████████▉          | 5811/8000 [1:28:10<31:54,  1.14it/s]

Episode 5811/8000, real env return = -106.49


 73%|██████████████████████████▉          | 5821/8000 [1:28:19<31:07,  1.17it/s]

Episode 5821/8000, real env return = -106.51


 73%|██████████████████████████▉          | 5831/8000 [1:28:27<31:27,  1.15it/s]

Episode 5831/8000, real env return = -106.59


 73%|███████████████████████████          | 5841/8000 [1:28:36<31:50,  1.13it/s]

Episode 5841/8000, real env return = -106.16


 73%|███████████████████████████          | 5851/8000 [1:28:45<31:13,  1.15it/s]

Episode 5851/8000, real env return = -106.62


 73%|███████████████████████████          | 5861/8000 [1:28:54<31:34,  1.13it/s]

Episode 5861/8000, real env return = -106.03


 73%|███████████████████████████▏         | 5871/8000 [1:29:02<30:46,  1.15it/s]

Episode 5871/8000, real env return = -106.24


 74%|███████████████████████████▏         | 5881/8000 [1:29:11<30:50,  1.15it/s]

Episode 5881/8000, real env return = -106.68


 74%|███████████████████████████▏         | 5891/8000 [1:29:20<30:37,  1.15it/s]

Episode 5891/8000, real env return = -107.29


 74%|███████████████████████████▎         | 5901/8000 [1:29:28<30:39,  1.14it/s]

Episode 5901/8000, real env return = -106.90


 74%|███████████████████████████▎         | 5911/8000 [1:29:37<30:22,  1.15it/s]

Episode 5911/8000, real env return = -106.58


 74%|███████████████████████████▍         | 5921/8000 [1:29:46<30:13,  1.15it/s]

Episode 5921/8000, real env return = -106.45


 74%|███████████████████████████▍         | 5931/8000 [1:29:55<30:18,  1.14it/s]

Episode 5931/8000, real env return = -106.24


 74%|███████████████████████████▍         | 5941/8000 [1:30:03<30:11,  1.14it/s]

Episode 5941/8000, real env return = -106.63


 74%|███████████████████████████▌         | 5951/8000 [1:30:12<29:54,  1.14it/s]

Episode 5951/8000, real env return = -107.10


 75%|███████████████████████████▌         | 5961/8000 [1:30:21<29:34,  1.15it/s]

Episode 5961/8000, real env return = -106.90


 75%|███████████████████████████▌         | 5971/8000 [1:30:29<29:38,  1.14it/s]

Episode 5971/8000, real env return = -107.46


 75%|███████████████████████████▋         | 5981/8000 [1:30:38<28:50,  1.17it/s]

Episode 5981/8000, real env return = -106.81


 75%|███████████████████████████▋         | 5991/8000 [1:30:47<29:20,  1.14it/s]

Episode 5991/8000, real env return = -106.29


 75%|███████████████████████████▊         | 6001/8000 [1:30:56<29:08,  1.14it/s]

Episode 6001/8000, real env return = -107.08


 75%|███████████████████████████▊         | 6011/8000 [1:31:04<29:54,  1.11it/s]

Episode 6011/8000, real env return = -106.47


 75%|███████████████████████████▊         | 6021/8000 [1:31:13<29:02,  1.14it/s]

Episode 6021/8000, real env return = -106.21


 75%|███████████████████████████▉         | 6031/8000 [1:31:22<28:08,  1.17it/s]

Episode 6031/8000, real env return = -107.61


 76%|███████████████████████████▉         | 6041/8000 [1:31:30<28:07,  1.16it/s]

Episode 6041/8000, real env return = -107.05


 76%|███████████████████████████▉         | 6051/8000 [1:31:39<28:00,  1.16it/s]

Episode 6051/8000, real env return = -106.20


 76%|████████████████████████████         | 6061/8000 [1:31:48<28:02,  1.15it/s]

Episode 6061/8000, real env return = -106.47


 76%|████████████████████████████         | 6071/8000 [1:31:57<28:08,  1.14it/s]

Episode 6071/8000, real env return = -106.20


 76%|████████████████████████████         | 6081/8000 [1:32:05<27:54,  1.15it/s]

Episode 6081/8000, real env return = -106.33


 76%|████████████████████████████▏        | 6091/8000 [1:32:14<27:32,  1.16it/s]

Episode 6091/8000, real env return = -106.59


 76%|████████████████████████████▏        | 6101/8000 [1:32:23<27:29,  1.15it/s]

Episode 6101/8000, real env return = -107.33


 76%|████████████████████████████▎        | 6111/8000 [1:32:32<27:34,  1.14it/s]

Episode 6111/8000, real env return = -106.22


 77%|████████████████████████████▎        | 6121/8000 [1:32:40<27:17,  1.15it/s]

Episode 6121/8000, real env return = -107.04


 77%|████████████████████████████▎        | 6131/8000 [1:32:49<26:37,  1.17it/s]

Episode 6131/8000, real env return = -106.40


 77%|████████████████████████████▍        | 6141/8000 [1:32:58<26:33,  1.17it/s]

Episode 6141/8000, real env return = -106.54


 77%|████████████████████████████▍        | 6151/8000 [1:33:06<26:56,  1.14it/s]

Episode 6151/8000, real env return = -104.00


 77%|████████████████████████████▍        | 6161/8000 [1:33:15<27:07,  1.13it/s]

Episode 6161/8000, real env return = -107.04


 77%|████████████████████████████▌        | 6171/8000 [1:33:24<26:22,  1.16it/s]

Episode 6171/8000, real env return = -107.03


 77%|████████████████████████████▌        | 6181/8000 [1:33:32<26:42,  1.13it/s]

Episode 6181/8000, real env return = -106.92


 77%|████████████████████████████▋        | 6191/8000 [1:33:41<26:51,  1.12it/s]

Episode 6191/8000, real env return = -106.93


 78%|████████████████████████████▋        | 6201/8000 [1:33:50<25:51,  1.16it/s]

Episode 6201/8000, real env return = -106.66


 78%|████████████████████████████▋        | 6211/8000 [1:33:59<26:10,  1.14it/s]

Episode 6211/8000, real env return = -107.08


 78%|████████████████████████████▊        | 6221/8000 [1:34:07<25:57,  1.14it/s]

Episode 6221/8000, real env return = -106.59


 78%|████████████████████████████▊        | 6231/8000 [1:34:16<25:53,  1.14it/s]

Episode 6231/8000, real env return = -106.10


 78%|████████████████████████████▊        | 6241/8000 [1:34:25<25:24,  1.15it/s]

Episode 6241/8000, real env return = -106.93


 78%|████████████████████████████▉        | 6251/8000 [1:34:34<25:37,  1.14it/s]

Episode 6251/8000, real env return = -106.50


 78%|████████████████████████████▉        | 6261/8000 [1:34:42<25:12,  1.15it/s]

Episode 6261/8000, real env return = -107.33


 78%|█████████████████████████████        | 6271/8000 [1:34:51<24:57,  1.15it/s]

Episode 6271/8000, real env return = -106.79


 79%|█████████████████████████████        | 6281/8000 [1:35:00<25:11,  1.14it/s]

Episode 6281/8000, real env return = -106.62


 79%|█████████████████████████████        | 6291/8000 [1:35:09<24:42,  1.15it/s]

Episode 6291/8000, real env return = -106.44


 79%|█████████████████████████████▏       | 6301/8000 [1:35:17<24:50,  1.14it/s]

Episode 6301/8000, real env return = -106.58


 79%|█████████████████████████████▏       | 6311/8000 [1:35:26<24:55,  1.13it/s]

Episode 6311/8000, real env return = -106.97


 79%|█████████████████████████████▏       | 6321/8000 [1:35:35<24:17,  1.15it/s]

Episode 6321/8000, real env return = -106.97


 79%|█████████████████████████████▎       | 6331/8000 [1:35:43<23:51,  1.17it/s]

Episode 6331/8000, real env return = -106.49


 79%|█████████████████████████████▎       | 6341/8000 [1:35:52<24:56,  1.11it/s]

Episode 6341/8000, real env return = -106.87


 79%|█████████████████████████████▎       | 6351/8000 [1:36:01<23:49,  1.15it/s]

Episode 6351/8000, real env return = -106.98


 80%|█████████████████████████████▍       | 6361/8000 [1:36:10<23:39,  1.15it/s]

Episode 6361/8000, real env return = -107.15


 80%|█████████████████████████████▍       | 6371/8000 [1:36:18<23:39,  1.15it/s]

Episode 6371/8000, real env return = -106.59


 80%|█████████████████████████████▌       | 6381/8000 [1:36:27<23:25,  1.15it/s]

Episode 6381/8000, real env return = -107.15


 80%|█████████████████████████████▌       | 6391/8000 [1:36:36<22:58,  1.17it/s]

Episode 6391/8000, real env return = -107.39


 80%|█████████████████████████████▌       | 6401/8000 [1:36:44<23:12,  1.15it/s]

Episode 6401/8000, real env return = -106.63


 80%|█████████████████████████████▋       | 6411/8000 [1:36:53<22:49,  1.16it/s]

Episode 6411/8000, real env return = -106.65


 80%|█████████████████████████████▋       | 6421/8000 [1:37:02<23:21,  1.13it/s]

Episode 6421/8000, real env return = -106.96


 80%|█████████████████████████████▋       | 6431/8000 [1:37:11<22:47,  1.15it/s]

Episode 6431/8000, real env return = -107.12


 81%|█████████████████████████████▊       | 6441/8000 [1:37:19<22:45,  1.14it/s]

Episode 6441/8000, real env return = -107.76


 81%|█████████████████████████████▊       | 6451/8000 [1:37:28<22:36,  1.14it/s]

Episode 6451/8000, real env return = -107.17


 81%|█████████████████████████████▉       | 6461/8000 [1:37:37<21:56,  1.17it/s]

Episode 6461/8000, real env return = -106.99


 81%|█████████████████████████████▉       | 6471/8000 [1:37:46<22:25,  1.14it/s]

Episode 6471/8000, real env return = -107.03


 81%|█████████████████████████████▉       | 6481/8000 [1:37:54<21:39,  1.17it/s]

Episode 6481/8000, real env return = -106.99


 81%|██████████████████████████████       | 6491/8000 [1:38:03<21:39,  1.16it/s]

Episode 6491/8000, real env return = -107.14


 81%|██████████████████████████████       | 6501/8000 [1:38:12<21:42,  1.15it/s]

Episode 6501/8000, real env return = -106.81


 81%|██████████████████████████████       | 6511/8000 [1:38:20<21:47,  1.14it/s]

Episode 6511/8000, real env return = -106.27


 82%|██████████████████████████████▏      | 6521/8000 [1:38:29<21:28,  1.15it/s]

Episode 6521/8000, real env return = -107.71


 82%|██████████████████████████████▏      | 6531/8000 [1:38:38<21:29,  1.14it/s]

Episode 6531/8000, real env return = -106.95


 82%|██████████████████████████████▎      | 6541/8000 [1:38:47<21:19,  1.14it/s]

Episode 6541/8000, real env return = -107.31


 82%|██████████████████████████████▎      | 6551/8000 [1:38:55<21:06,  1.14it/s]

Episode 6551/8000, real env return = -107.20


 82%|██████████████████████████████▎      | 6561/8000 [1:39:04<20:45,  1.15it/s]

Episode 6561/8000, real env return = -106.97


 82%|██████████████████████████████▍      | 6571/8000 [1:39:13<20:14,  1.18it/s]

Episode 6571/8000, real env return = -107.19


 82%|██████████████████████████████▍      | 6581/8000 [1:39:22<20:33,  1.15it/s]

Episode 6581/8000, real env return = -106.60


 82%|██████████████████████████████▍      | 6591/8000 [1:39:30<20:31,  1.14it/s]

Episode 6591/8000, real env return = -107.55


 83%|██████████████████████████████▌      | 6601/8000 [1:39:39<19:54,  1.17it/s]

Episode 6601/8000, real env return = -107.17


 83%|██████████████████████████████▌      | 6611/8000 [1:39:48<20:15,  1.14it/s]

Episode 6611/8000, real env return = -106.82


 83%|██████████████████████████████▌      | 6621/8000 [1:39:56<20:06,  1.14it/s]

Episode 6621/8000, real env return = -106.74


 83%|██████████████████████████████▋      | 6631/8000 [1:40:05<19:33,  1.17it/s]

Episode 6631/8000, real env return = -107.37


 83%|██████████████████████████████▋      | 6641/8000 [1:40:14<19:48,  1.14it/s]

Episode 6641/8000, real env return = -106.90


 83%|██████████████████████████████▊      | 6651/8000 [1:40:23<19:35,  1.15it/s]

Episode 6651/8000, real env return = -106.93


 83%|██████████████████████████████▊      | 6661/8000 [1:40:31<19:32,  1.14it/s]

Episode 6661/8000, real env return = -106.88


 83%|██████████████████████████████▊      | 6671/8000 [1:40:40<19:15,  1.15it/s]

Episode 6671/8000, real env return = -107.49


 84%|██████████████████████████████▉      | 6681/8000 [1:40:49<19:28,  1.13it/s]

Episode 6681/8000, real env return = -107.24


 84%|██████████████████████████████▉      | 6691/8000 [1:40:58<19:17,  1.13it/s]

Episode 6691/8000, real env return = -106.03


 84%|██████████████████████████████▉      | 6701/8000 [1:41:06<18:44,  1.16it/s]

Episode 6701/8000, real env return = -106.73


 84%|███████████████████████████████      | 6711/8000 [1:41:15<18:52,  1.14it/s]

Episode 6711/8000, real env return = -107.00


 84%|███████████████████████████████      | 6721/8000 [1:41:24<18:25,  1.16it/s]

Episode 6721/8000, real env return = -106.87


 84%|███████████████████████████████▏     | 6731/8000 [1:41:32<18:32,  1.14it/s]

Episode 6731/8000, real env return = -106.30


 84%|███████████████████████████████▏     | 6741/8000 [1:41:41<18:33,  1.13it/s]

Episode 6741/8000, real env return = -106.42


 84%|███████████████████████████████▏     | 6751/8000 [1:41:50<18:04,  1.15it/s]

Episode 6751/8000, real env return = -106.59


 85%|███████████████████████████████▎     | 6761/8000 [1:41:58<17:22,  1.19it/s]

Episode 6761/8000, real env return = -106.80


 85%|███████████████████████████████▎     | 6771/8000 [1:42:07<17:53,  1.15it/s]

Episode 6771/8000, real env return = -105.92


 85%|███████████████████████████████▎     | 6781/8000 [1:42:16<17:18,  1.17it/s]

Episode 6781/8000, real env return = -106.81


 85%|███████████████████████████████▍     | 6791/8000 [1:42:25<17:38,  1.14it/s]

Episode 6791/8000, real env return = -106.85


 85%|███████████████████████████████▍     | 6801/8000 [1:42:33<17:24,  1.15it/s]

Episode 6801/8000, real env return = -106.10


 85%|███████████████████████████████▌     | 6811/8000 [1:42:42<17:22,  1.14it/s]

Episode 6811/8000, real env return = -105.94


 85%|███████████████████████████████▌     | 6821/8000 [1:42:51<17:15,  1.14it/s]

Episode 6821/8000, real env return = -106.35


 85%|███████████████████████████████▌     | 6831/8000 [1:42:59<17:10,  1.13it/s]

Episode 6831/8000, real env return = -106.10


 86%|███████████████████████████████▋     | 6841/8000 [1:43:08<16:29,  1.17it/s]

Episode 6841/8000, real env return = -105.87


 86%|███████████████████████████████▋     | 6851/8000 [1:43:17<16:42,  1.15it/s]

Episode 6851/8000, real env return = -106.84


 86%|███████████████████████████████▋     | 6861/8000 [1:43:26<16:34,  1.15it/s]

Episode 6861/8000, real env return = -106.92


 86%|███████████████████████████████▊     | 6871/8000 [1:43:34<16:13,  1.16it/s]

Episode 6871/8000, real env return = -106.26


 86%|███████████████████████████████▊     | 6881/8000 [1:43:43<16:06,  1.16it/s]

Episode 6881/8000, real env return = -106.75


 86%|███████████████████████████████▊     | 6891/8000 [1:43:52<15:59,  1.16it/s]

Episode 6891/8000, real env return = -106.47


 86%|███████████████████████████████▉     | 6901/8000 [1:44:00<16:07,  1.14it/s]

Episode 6901/8000, real env return = -106.37


 86%|███████████████████████████████▉     | 6911/8000 [1:44:09<15:51,  1.14it/s]

Episode 6911/8000, real env return = -105.96


 87%|████████████████████████████████     | 6921/8000 [1:44:18<15:52,  1.13it/s]

Episode 6921/8000, real env return = -106.65


 87%|████████████████████████████████     | 6931/8000 [1:44:27<15:44,  1.13it/s]

Episode 6931/8000, real env return = -106.70


 87%|████████████████████████████████     | 6941/8000 [1:44:35<15:24,  1.15it/s]

Episode 6941/8000, real env return = -106.58


 87%|████████████████████████████████▏    | 6951/8000 [1:44:44<15:12,  1.15it/s]

Episode 6951/8000, real env return = -107.33


 87%|████████████████████████████████▏    | 6961/8000 [1:44:53<15:12,  1.14it/s]

Episode 6961/8000, real env return = -106.69


 87%|████████████████████████████████▏    | 6971/8000 [1:45:02<15:08,  1.13it/s]

Episode 6971/8000, real env return = -105.99


 87%|████████████████████████████████▎    | 6981/8000 [1:45:11<14:55,  1.14it/s]

Episode 6981/8000, real env return = -106.39


 87%|████████████████████████████████▎    | 6991/8000 [1:45:19<15:08,  1.11it/s]

Episode 6991/8000, real env return = -109.32


 88%|████████████████████████████████▍    | 7001/8000 [1:45:28<14:36,  1.14it/s]

Episode 7001/8000, real env return = -106.70


 88%|████████████████████████████████▍    | 7011/8000 [1:45:37<14:28,  1.14it/s]

Episode 7011/8000, real env return = -107.22


 88%|████████████████████████████████▍    | 7021/8000 [1:45:46<14:23,  1.13it/s]

Episode 7021/8000, real env return = -107.64


 88%|████████████████████████████████▌    | 7031/8000 [1:45:54<14:12,  1.14it/s]

Episode 7031/8000, real env return = -107.09


 88%|████████████████████████████████▌    | 7041/8000 [1:46:03<13:51,  1.15it/s]

Episode 7041/8000, real env return = -106.61


 88%|████████████████████████████████▌    | 7051/8000 [1:46:12<13:50,  1.14it/s]

Episode 7051/8000, real env return = -107.11


 88%|████████████████████████████████▋    | 7061/8000 [1:46:20<13:21,  1.17it/s]

Episode 7061/8000, real env return = -107.07


 88%|████████████████████████████████▋    | 7071/8000 [1:46:29<13:29,  1.15it/s]

Episode 7071/8000, real env return = -106.06


 89%|████████████████████████████████▋    | 7081/8000 [1:46:38<13:15,  1.16it/s]

Episode 7081/8000, real env return = -106.62


 89%|████████████████████████████████▊    | 7091/8000 [1:46:46<12:56,  1.17it/s]

Episode 7091/8000, real env return = -106.31


 89%|████████████████████████████████▊    | 7101/8000 [1:46:54<12:36,  1.19it/s]

Episode 7101/8000, real env return = -106.74


 89%|████████████████████████████████▉    | 7111/8000 [1:47:03<12:07,  1.22it/s]

Episode 7111/8000, real env return = -106.53


 89%|████████████████████████████████▉    | 7121/8000 [1:47:11<12:26,  1.18it/s]

Episode 7121/8000, real env return = -106.13


 89%|████████████████████████████████▉    | 7131/8000 [1:47:20<12:29,  1.16it/s]

Episode 7131/8000, real env return = -106.47


 89%|█████████████████████████████████    | 7141/8000 [1:47:28<11:48,  1.21it/s]

Episode 7141/8000, real env return = -106.47


 89%|█████████████████████████████████    | 7151/8000 [1:47:37<12:35,  1.12it/s]

Episode 7151/8000, real env return = -105.06


 90%|█████████████████████████████████    | 7161/8000 [1:47:45<11:43,  1.19it/s]

Episode 7161/8000, real env return = -101.89


 90%|█████████████████████████████████▏   | 7171/8000 [1:47:54<11:49,  1.17it/s]

Episode 7171/8000, real env return = -103.31


 90%|█████████████████████████████████▏   | 7181/8000 [1:48:02<11:25,  1.19it/s]

Episode 7181/8000, real env return = -102.49


 90%|█████████████████████████████████▎   | 7191/8000 [1:48:11<11:55,  1.13it/s]

Episode 7191/8000, real env return = -102.77


 90%|█████████████████████████████████▎   | 7201/8000 [1:48:20<11:37,  1.15it/s]

Episode 7201/8000, real env return = -102.83


 90%|█████████████████████████████████▎   | 7211/8000 [1:48:28<11:44,  1.12it/s]

Episode 7211/8000, real env return = -102.37


 90%|█████████████████████████████████▍   | 7221/8000 [1:48:37<11:21,  1.14it/s]

Episode 7221/8000, real env return = -102.06


 90%|█████████████████████████████████▍   | 7231/8000 [1:48:46<11:12,  1.14it/s]

Episode 7231/8000, real env return = -103.37


 91%|█████████████████████████████████▍   | 7241/8000 [1:48:55<10:54,  1.16it/s]

Episode 7241/8000, real env return = -101.60


 91%|█████████████████████████████████▌   | 7251/8000 [1:49:04<11:07,  1.12it/s]

Episode 7251/8000, real env return = -100.56


 91%|█████████████████████████████████▌   | 7261/8000 [1:49:13<10:49,  1.14it/s]

Episode 7261/8000, real env return = -100.41


 91%|█████████████████████████████████▋   | 7271/8000 [1:49:22<10:44,  1.13it/s]

Episode 7271/8000, real env return = -100.88


 91%|█████████████████████████████████▋   | 7281/8000 [1:49:31<10:39,  1.12it/s]

Episode 7281/8000, real env return = -101.46


 91%|█████████████████████████████████▋   | 7291/8000 [1:49:40<10:30,  1.12it/s]

Episode 7291/8000, real env return = -102.28


 91%|█████████████████████████████████▊   | 7301/8000 [1:49:48<10:14,  1.14it/s]

Episode 7301/8000, real env return = -101.44


 91%|█████████████████████████████████▊   | 7311/8000 [1:49:57<10:09,  1.13it/s]

Episode 7311/8000, real env return = -101.94


 92%|█████████████████████████████████▊   | 7321/8000 [1:50:06<09:59,  1.13it/s]

Episode 7321/8000, real env return = -101.25


 92%|█████████████████████████████████▉   | 7331/8000 [1:50:15<09:49,  1.14it/s]

Episode 7331/8000, real env return = -102.26


 92%|█████████████████████████████████▉   | 7341/8000 [1:50:24<09:47,  1.12it/s]

Episode 7341/8000, real env return = -101.75


 92%|█████████████████████████████████▉   | 7351/8000 [1:50:33<09:11,  1.18it/s]

Episode 7351/8000, real env return = -101.90


 92%|██████████████████████████████████   | 7361/8000 [1:50:42<09:15,  1.15it/s]

Episode 7361/8000, real env return = -101.85


 92%|██████████████████████████████████   | 7371/8000 [1:50:50<09:17,  1.13it/s]

Episode 7371/8000, real env return = -100.88


 92%|██████████████████████████████████▏  | 7381/8000 [1:50:59<09:14,  1.12it/s]

Episode 7381/8000, real env return = -101.00


 92%|██████████████████████████████████▏  | 7391/8000 [1:51:08<08:58,  1.13it/s]

Episode 7391/8000, real env return = -101.34


 93%|██████████████████████████████████▏  | 7401/8000 [1:51:17<08:49,  1.13it/s]

Episode 7401/8000, real env return = -100.81


 93%|██████████████████████████████████▎  | 7411/8000 [1:51:26<08:38,  1.14it/s]

Episode 7411/8000, real env return = -101.11


 93%|██████████████████████████████████▎  | 7421/8000 [1:51:35<08:36,  1.12it/s]

Episode 7421/8000, real env return = -100.87


 93%|██████████████████████████████████▎  | 7431/8000 [1:51:43<08:26,  1.12it/s]

Episode 7431/8000, real env return = -101.37


 93%|██████████████████████████████████▍  | 7441/8000 [1:51:52<08:13,  1.13it/s]

Episode 7441/8000, real env return = -101.42


 93%|██████████████████████████████████▍  | 7451/8000 [1:52:01<08:13,  1.11it/s]

Episode 7451/8000, real env return = -100.78


 93%|██████████████████████████████████▌  | 7461/8000 [1:52:10<07:56,  1.13it/s]

Episode 7461/8000, real env return = -101.83


 93%|██████████████████████████████████▌  | 7471/8000 [1:52:19<07:43,  1.14it/s]

Episode 7471/8000, real env return = -100.85


 94%|██████████████████████████████████▌  | 7481/8000 [1:52:28<07:42,  1.12it/s]

Episode 7481/8000, real env return = -101.42


 94%|██████████████████████████████████▋  | 7491/8000 [1:52:37<07:21,  1.15it/s]

Episode 7491/8000, real env return = -100.64


 94%|██████████████████████████████████▋  | 7501/8000 [1:52:46<07:21,  1.13it/s]

Episode 7501/8000, real env return = -101.33


 94%|██████████████████████████████████▋  | 7511/8000 [1:52:54<07:08,  1.14it/s]

Episode 7511/8000, real env return = -101.19


 94%|██████████████████████████████████▊  | 7521/8000 [1:53:03<07:11,  1.11it/s]

Episode 7521/8000, real env return = -100.61


 94%|██████████████████████████████████▊  | 7531/8000 [1:53:12<06:56,  1.13it/s]

Episode 7531/8000, real env return = -101.40


 94%|██████████████████████████████████▉  | 7541/8000 [1:53:21<06:46,  1.13it/s]

Episode 7541/8000, real env return = -101.21


 94%|██████████████████████████████████▉  | 7551/8000 [1:53:30<06:42,  1.12it/s]

Episode 7551/8000, real env return = -101.33


 95%|██████████████████████████████████▉  | 7561/8000 [1:53:39<06:30,  1.12it/s]

Episode 7561/8000, real env return = -100.97


 95%|███████████████████████████████████  | 7571/8000 [1:53:48<06:21,  1.13it/s]

Episode 7571/8000, real env return = -100.97


 95%|███████████████████████████████████  | 7581/8000 [1:53:57<06:13,  1.12it/s]

Episode 7581/8000, real env return = -101.25


 95%|███████████████████████████████████  | 7591/8000 [1:54:05<05:56,  1.15it/s]

Episode 7591/8000, real env return = -101.36


 95%|███████████████████████████████████▏ | 7601/8000 [1:54:14<05:47,  1.15it/s]

Episode 7601/8000, real env return = -101.47


 95%|███████████████████████████████████▏ | 7611/8000 [1:54:23<05:48,  1.12it/s]

Episode 7611/8000, real env return = -101.31


 95%|███████████████████████████████████▏ | 7621/8000 [1:54:32<05:33,  1.14it/s]

Episode 7621/8000, real env return = -101.85


 95%|███████████████████████████████████▎ | 7631/8000 [1:54:41<05:23,  1.14it/s]

Episode 7631/8000, real env return = -101.85


 96%|███████████████████████████████████▎ | 7641/8000 [1:54:49<04:59,  1.20it/s]

Episode 7641/8000, real env return = -101.83


 96%|███████████████████████████████████▍ | 7651/8000 [1:54:58<04:57,  1.17it/s]

Episode 7651/8000, real env return = -102.27


 96%|███████████████████████████████████▍ | 7661/8000 [1:55:06<04:53,  1.16it/s]

Episode 7661/8000, real env return = -102.07


 96%|███████████████████████████████████▍ | 7671/8000 [1:55:15<04:52,  1.12it/s]

Episode 7671/8000, real env return = -100.15


 96%|███████████████████████████████████▌ | 7681/8000 [1:55:24<04:41,  1.13it/s]

Episode 7681/8000, real env return = -101.59


 96%|███████████████████████████████████▌ | 7691/8000 [1:55:33<04:35,  1.12it/s]

Episode 7691/8000, real env return = -100.04


 96%|███████████████████████████████████▌ | 7701/8000 [1:55:42<04:24,  1.13it/s]

Episode 7701/8000, real env return = -102.00


 96%|███████████████████████████████████▋ | 7711/8000 [1:55:51<04:18,  1.12it/s]

Episode 7711/8000, real env return = -102.36


 97%|███████████████████████████████████▋ | 7721/8000 [1:55:59<04:03,  1.14it/s]

Episode 7721/8000, real env return = -101.45


 97%|███████████████████████████████████▊ | 7731/8000 [1:56:08<03:57,  1.13it/s]

Episode 7731/8000, real env return = -100.81


 97%|███████████████████████████████████▊ | 7741/8000 [1:56:17<03:48,  1.13it/s]

Episode 7741/8000, real env return = -100.14


 97%|███████████████████████████████████▊ | 7751/8000 [1:56:26<03:34,  1.16it/s]

Episode 7751/8000, real env return = -100.77


 97%|███████████████████████████████████▉ | 7761/8000 [1:56:34<03:21,  1.18it/s]

Episode 7761/8000, real env return = -101.00


 97%|███████████████████████████████████▉ | 7771/8000 [1:56:43<03:19,  1.15it/s]

Episode 7771/8000, real env return = -100.72


 97%|███████████████████████████████████▉ | 7781/8000 [1:56:52<03:19,  1.10it/s]

Episode 7781/8000, real env return = -100.80


 97%|████████████████████████████████████ | 7791/8000 [1:57:00<02:54,  1.20it/s]

Episode 7791/8000, real env return = -101.07


 98%|████████████████████████████████████ | 7801/8000 [1:57:09<02:56,  1.13it/s]

Episode 7801/8000, real env return = -101.53


 98%|████████████████████████████████████▏| 7811/8000 [1:57:17<02:40,  1.18it/s]

Episode 7811/8000, real env return = -101.79


 98%|████████████████████████████████████▏| 7821/8000 [1:57:26<02:30,  1.19it/s]

Episode 7821/8000, real env return = -101.78


 98%|████████████████████████████████████▏| 7831/8000 [1:57:34<02:22,  1.19it/s]

Episode 7831/8000, real env return = -101.90


 98%|████████████████████████████████████▎| 7841/8000 [1:57:43<02:15,  1.17it/s]

Episode 7841/8000, real env return = -101.89


 98%|████████████████████████████████████▎| 7851/8000 [1:57:51<02:00,  1.23it/s]

Episode 7851/8000, real env return = -101.61


 98%|████████████████████████████████████▎| 7861/8000 [1:58:00<02:00,  1.16it/s]

Episode 7861/8000, real env return = -101.07


 98%|████████████████████████████████████▍| 7871/8000 [1:58:09<01:54,  1.13it/s]

Episode 7871/8000, real env return = -101.74


 99%|████████████████████████████████████▍| 7881/8000 [1:58:17<01:41,  1.17it/s]

Episode 7881/8000, real env return = -101.55


 99%|████████████████████████████████████▍| 7891/8000 [1:58:26<01:36,  1.13it/s]

Episode 7891/8000, real env return = -101.95


 99%|████████████████████████████████████▌| 7901/8000 [1:58:34<01:26,  1.14it/s]

Episode 7901/8000, real env return = -101.55


 99%|████████████████████████████████████▌| 7911/8000 [1:58:43<01:18,  1.13it/s]

Episode 7911/8000, real env return = -102.45


 99%|████████████████████████████████████▋| 7921/8000 [1:58:52<01:07,  1.17it/s]

Episode 7921/8000, real env return = -102.33


 99%|████████████████████████████████████▋| 7931/8000 [1:59:00<00:57,  1.20it/s]

Episode 7931/8000, real env return = -101.63


 99%|████████████████████████████████████▋| 7941/8000 [1:59:08<00:50,  1.17it/s]

Episode 7941/8000, real env return = -101.31


 99%|████████████████████████████████████▊| 7951/8000 [1:59:17<00:40,  1.20it/s]

Episode 7951/8000, real env return = -102.08


100%|████████████████████████████████████▊| 7961/8000 [1:59:25<00:32,  1.22it/s]

Episode 7961/8000, real env return = -100.83


100%|████████████████████████████████████▊| 7971/8000 [1:59:34<00:23,  1.22it/s]

Episode 7971/8000, real env return = -101.33


100%|████████████████████████████████████▉| 7981/8000 [1:59:42<00:16,  1.16it/s]

Episode 7981/8000, real env return = -99.70


100%|████████████████████████████████████▉| 7991/8000 [1:59:51<00:07,  1.18it/s]

Episode 7991/8000, real env return = -100.00


100%|█████████████████████████████████████| 8000/8000 [1:59:59<00:00,  1.11it/s]

Training finished.



