In [3]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.logger import configure

# ========= 1. Environment Wrapper =========
class EpisodicRewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.cumulative_reward = 0.0

    def step(self, action):
        result = self.env.step(action)
        if len(result) == 5:
            obs, reward, terminated, truncated, info = result
            done = terminated or truncated
        else:
            obs, reward, done, info = result

        self.cumulative_reward += reward
        
        if done:
            reward = self.cumulative_reward
            self.cumulative_reward = 0.0
        else:
            reward = 0.0
        
        return obs, reward, done, info

    def reset(self, **kwargs):
        self.cumulative_reward = 0.0
        result = self.env.reset(**kwargs)
        if isinstance(result, tuple):
            obs, _ = result
        else:
            obs = result
        return obs



class TrajectoryReplay:
    def __init__(self):
        self.trajectories = []

    def add_trajectory(self, traj):
        self.trajectories.append(traj)

    def sample(self, batch_size):
        indices = np.random.randint(0, len(self.trajectories), size=batch_size)
        return [self.trajectories[i] for i in indices]

    def __len__(self):
        return len(self.trajectories)


class RewardModel(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_size=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim + action_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
        )
        self.mu_layer = nn.Linear(hidden_size, 1)
        self.log_sigma_layer = nn.Linear(hidden_size, 1)

    def forward(self, s, a):
        x = torch.cat([s, a], dim=-1)
        x = self.net(x)
        mu = self.mu_layer(x)
        log_sigma = self.log_sigma_layer(x)
        sigma = torch.clamp(torch.exp(log_sigma), min=1e-3, max=4.0)
        return mu, sigma


# ========= 4. Train a reward model with batch trajectories ==========
def train_reward_model_gaussian_loo(reward_model, optimizer, trajectories, device='cpu'):
    reward_model.train()
    total_loss = 0.0
    total_episodes = 0

    for traj in trajectories:
        R_ep = sum([t[2] for t in traj])
        Tj = len(traj)
        if Tj == 0:
            continue

        s_list, a_list = [], []
        for (s, a, r, s_next, done) in traj:
            s_list.append(s)
            a_list.append(a)
        states = torch.tensor(s_list, dtype=torch.float, device=device)
        actions = torch.tensor(a_list, dtype=torch.float, device=device)
        
        mu, sigma = reward_model(states, actions)
        
        nll_list = []
        for i in range(Tj):
            mask = torch.ones(Tj, dtype=torch.bool, device=device)
            mask[i] = False
            epsilons = torch.randn((mask.sum(), 1), device=device)
            r_samples = mu[mask] + epsilons * sigma[mask]
            leave_one_sum = r_samples.sum()
            target_i = R_ep - leave_one_sum
            nll_i = torch.log(sigma[i]) + ((target_i - mu[i])**2) / (2 * sigma[i]**2)
            nll_list.append(nll_i)
        
        loss_traj = torch.stack(nll_list).mean()
        total_loss += loss_traj
        total_episodes += 1

    if total_episodes > 0:
        loss_mean = total_loss / total_episodes
    else:
        loss_mean = torch.tensor(0.0, device=device)

    optimizer.zero_grad()
    loss_mean.backward()
    optimizer.step()
    return loss_mean.item()








# ========= 5. How to collect a trajectory ==========
def collect_episodes(env, model, n_episodes, device='cpu'):
    trajectories = []
    for _ in range(n_episodes):
        result = env.reset()
        if isinstance(result, tuple):
            obs, _ = result
        else:
            obs = result
        done = False
        traj = []
        while not done:
            if obs is None or (hasattr(obs, '__len__') and len(obs) == 0):
                raise ValueError("Observation is empty, check env.reset() output")
            obs_tensor = torch.tensor(obs, dtype=torch.float, device=device).unsqueeze(0)
            with torch.no_grad():
                action, _states = model.predict(obs_tensor.cpu().numpy(), deterministic=False)
            result = env.step(action[0])
            
            if len(result) == 5:
                next_obs, reward, terminated, truncated, info = result
                done = terminated or truncated
            
            else:
                next_obs, reward, done, info = result
            traj.append((obs, action[0], reward, next_obs, done))
            obs = next_obs
        trajectories.append(traj)
    return trajectories

def add_shaped_transitions_to_replay(model, reward_model, trajectories, device='cpu'):
    for traj in trajectories:
        states = []
        actions = []
        next_states = []
        dones = []
        for (s, a, r, s_next, d) in traj:
            states.append(s)
            actions.append(a)
            next_states.append(s_next)
            dones.append(d)
        
        s_tensor = torch.tensor(states, dtype=torch.float, device=device)
        a_tensor = torch.tensor(actions, dtype=torch.float, device=device)
        
        with torch.no_grad():
            mu, _ = reward_model(s_tensor, a_tensor)
        shaped_rewards = mu.cpu().numpy().squeeze(-1)
        
        for i in range(len(traj)):
            s, a, _, s_next, d = traj[i]
            r_shaped = shaped_rewards[i]
            model.replay_buffer.add(
                s, s_next, a, r_shaped, d, infos=[{}],
            )

def main():
    class Args:
        env = "BipedalWalker-v3"
        episodes = 8000
        steps_per_update = 200
        device = "cuda"
    
    args = Args()
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    base_env = gym.make(args.env)
    env = EpisodicRewardWrapper(base_env)
    vec_env = DummyVecEnv([lambda: env])

    policy_kwargs = dict(
        net_arch=dict(pi=[64, 64], qf=[64, 64])
    )
    model = SAC(
        policy="MlpPolicy",
        env=vec_env,
        policy_kwargs=policy_kwargs,
        verbose=1,
        seed=42,
        buffer_size=100000,
        learning_starts=0,
        train_freq=1,
        gradient_steps=0,
        batch_size=512,
        gamma=0.99,
        device=device
    )

    from stable_baselines3.common.logger import configure
    model._logger = configure(folder=None, format_strings=["stdout"])

    state_dim = base_env.observation_space.shape[0]
    action_dim = base_env.action_space.shape[0]
    reward_model = RewardModel(state_dim, action_dim).to(device)
    reward_optimizer = optim.Adam(reward_model.parameters(), lr=3e-4)

    traj_replay = TrajectoryReplay()

    ep_record = []
    for ep in tqdm(range(args.episodes)):
        new_trajectories = collect_episodes(env, model, n_episodes=1, device=device)
        for traj in new_trajectories:
            traj_replay.add_trajectory(traj)

        if len(traj_replay) >= 5:
            batch_size_trajectories = 4
            sampled_trajs = traj_replay.sample(batch_size_trajectories)
            loss_val = train_reward_model_gaussian_loo(
                reward_model, reward_optimizer, sampled_trajs, device=device
            )
        
        add_shaped_transitions_to_replay(model, reward_model, new_trajectories, device=device)
        
        if len(traj_replay) >= 5:
            model.train(args.steps_per_update)
        ep_return = sum([t[2] for t in new_trajectories[-1]])
        ep_record.append(ep_return)
        if ep % 10 == 0:
            print(f"Episode {ep+1}/{args.episodes}, real env return = {ep_return:.2f}")

    print("Training finished.")
    return ep_record

In [4]:
train1 = main()

Using cuda device


  0%|                                          | 2/8000 [00:00<15:39,  8.51it/s]

Episode 1/8000, real env return = -103.75


  0%|                                      | 11/8000 [00:39<13:12:47,  5.95s/it]

Episode 11/8000, real env return = -99.19


  0%|                                      | 21/8000 [01:32<10:30:23,  4.74s/it]

Episode 21/8000, real env return = -105.93


  0%|▏                                      | 31/8000 [01:59<5:55:57,  2.68s/it]

Episode 31/8000, real env return = -106.35


  1%|▏                                      | 41/8000 [02:34<6:49:22,  3.09s/it]

Episode 41/8000, real env return = -122.67


  1%|▏                                      | 51/8000 [03:07<7:17:39,  3.30s/it]

Episode 51/8000, real env return = -103.35


  1%|▎                                      | 61/8000 [03:44<9:12:21,  4.17s/it]

Episode 61/8000, real env return = -109.50


  1%|▎                                     | 71/8000 [04:41<11:36:17,  5.27s/it]

Episode 71/8000, real env return = -103.83


  1%|▍                                     | 81/8000 [05:42<14:05:00,  6.40s/it]

Episode 81/8000, real env return = -120.08


  1%|▍                                     | 91/8000 [06:43<12:20:10,  5.62s/it]

Episode 91/8000, real env return = -116.08


  1%|▍                                    | 101/8000 [07:44<14:40:43,  6.69s/it]

Episode 101/8000, real env return = -18.10


  1%|▌                                    | 111/8000 [08:54<17:27:56,  7.97s/it]

Episode 111/8000, real env return = -168.34


  2%|▌                                    | 121/8000 [10:00<17:23:39,  7.95s/it]

Episode 121/8000, real env return = 72.40


  2%|▌                                    | 131/8000 [11:02<11:22:25,  5.20s/it]

Episode 131/8000, real env return = -78.72


  2%|▋                                    | 141/8000 [12:14<16:33:29,  7.58s/it]

Episode 141/8000, real env return = -193.38


  2%|▋                                    | 151/8000 [13:17<11:58:21,  5.49s/it]

Episode 151/8000, real env return = -188.83


  2%|▋                                    | 161/8000 [14:43<19:32:33,  8.97s/it]

Episode 161/8000, real env return = -126.70


  2%|▊                                    | 171/8000 [16:18<22:40:24, 10.43s/it]

Episode 171/8000, real env return = -136.83


  2%|▊                                    | 181/8000 [17:35<15:40:28,  7.22s/it]

Episode 181/8000, real env return = -251.86


  2%|▉                                    | 191/8000 [18:52<17:57:42,  8.28s/it]

Episode 191/8000, real env return = -141.63


  3%|▉                                    | 201/8000 [20:09<18:35:02,  8.58s/it]

Episode 201/8000, real env return = -59.39


  3%|▉                                    | 211/8000 [21:37<20:23:51,  9.43s/it]

Episode 211/8000, real env return = -180.50


  3%|█                                    | 221/8000 [22:58<17:56:57,  8.31s/it]

Episode 221/8000, real env return = -120.09


  3%|█                                    | 231/8000 [24:35<20:49:25,  9.65s/it]

Episode 231/8000, real env return = -109.15


  3%|█                                    | 241/8000 [25:55<18:50:44,  8.74s/it]

Episode 241/8000, real env return = -106.46


  3%|█▏                                   | 251/8000 [27:16<17:43:59,  8.24s/it]

Episode 251/8000, real env return = -111.62


  3%|█▏                                   | 261/8000 [28:19<13:06:52,  6.10s/it]

Episode 261/8000, real env return = -116.54


  3%|█▎                                   | 271/8000 [29:29<15:22:47,  7.16s/it]

Episode 271/8000, real env return = -125.22


  4%|█▎                                   | 281/8000 [30:43<17:08:01,  7.99s/it]

Episode 281/8000, real env return = -138.93


  4%|█▎                                   | 291/8000 [32:15<21:02:35,  9.83s/it]

Episode 291/8000, real env return = -125.75


  4%|█▍                                   | 301/8000 [33:35<15:33:09,  7.27s/it]

Episode 301/8000, real env return = -130.63


  4%|█▍                                   | 311/8000 [34:58<16:55:09,  7.92s/it]

Episode 311/8000, real env return = -129.17


  4%|█▍                                   | 321/8000 [36:26<19:55:12,  9.34s/it]

Episode 321/8000, real env return = -119.61


  4%|█▌                                   | 331/8000 [37:54<20:37:26,  9.68s/it]

Episode 331/8000, real env return = -132.10


  4%|█▌                                   | 341/8000 [39:27<20:01:48,  9.41s/it]

Episode 341/8000, real env return = -129.50


  4%|█▌                                   | 351/8000 [41:00<19:56:24,  9.38s/it]

Episode 351/8000, real env return = -131.01


  5%|█▋                                   | 361/8000 [42:33<19:25:09,  9.15s/it]

Episode 361/8000, real env return = -134.33


  5%|█▋                                   | 371/8000 [44:14<20:37:29,  9.73s/it]

Episode 371/8000, real env return = -110.92


  5%|█▊                                   | 381/8000 [45:54<22:06:24, 10.45s/it]

Episode 381/8000, real env return = -108.86


  5%|█▊                                   | 391/8000 [47:28<19:18:33,  9.14s/it]

Episode 391/8000, real env return = -116.82


  5%|█▊                                   | 401/8000 [49:15<23:04:31, 10.93s/it]

Episode 401/8000, real env return = -114.46


  5%|█▉                                   | 411/8000 [50:56<21:53:24, 10.38s/it]

Episode 411/8000, real env return = -132.56


  5%|█▉                                   | 421/8000 [52:34<19:58:14,  9.49s/it]

Episode 421/8000, real env return = -123.69


  5%|█▉                                   | 431/8000 [54:12<22:21:11, 10.63s/it]

Episode 431/8000, real env return = -129.58


  6%|██                                   | 441/8000 [55:59<24:48:01, 11.81s/it]

Episode 441/8000, real env return = -125.45


  6%|██                                   | 451/8000 [57:30<18:49:50,  8.98s/it]

Episode 451/8000, real env return = -131.44


  6%|██▏                                  | 461/8000 [59:11<21:05:43, 10.07s/it]

Episode 461/8000, real env return = -114.10


  6%|██                                 | 471/8000 [1:00:59<21:27:40, 10.26s/it]

Episode 471/8000, real env return = -104.84


  6%|██                                 | 481/8000 [1:02:24<16:20:50,  7.83s/it]

Episode 481/8000, real env return = -110.86


  6%|██▏                                | 491/8000 [1:03:51<18:45:54,  9.00s/it]

Episode 491/8000, real env return = -102.28


  6%|██▏                                | 501/8000 [1:05:10<16:41:03,  8.01s/it]

Episode 501/8000, real env return = -103.37


  6%|██▏                                | 511/8000 [1:06:30<17:44:11,  8.53s/it]

Episode 511/8000, real env return = -102.22


  7%|██▎                                | 521/8000 [1:07:54<15:43:51,  7.57s/it]

Episode 521/8000, real env return = -102.59


  7%|██▎                                | 531/8000 [1:09:04<15:51:59,  7.65s/it]

Episode 531/8000, real env return = -101.46


  7%|██▎                                | 541/8000 [1:10:22<14:36:03,  7.05s/it]

Episode 541/8000, real env return = -101.91


  7%|██▍                                | 551/8000 [1:11:42<18:22:41,  8.88s/it]

Episode 551/8000, real env return = -100.62


  7%|██▍                                | 561/8000 [1:12:53<14:41:04,  7.11s/it]

Episode 561/8000, real env return = -102.63


  7%|██▍                                | 571/8000 [1:14:13<17:09:56,  8.32s/it]

Episode 571/8000, real env return = -101.76


  7%|██▌                                | 581/8000 [1:15:31<15:43:39,  7.63s/it]

Episode 581/8000, real env return = -102.49


  7%|██▌                                | 591/8000 [1:16:40<16:19:45,  7.93s/it]

Episode 591/8000, real env return = -101.12


  8%|██▋                                | 601/8000 [1:17:37<11:47:20,  5.74s/it]

Episode 601/8000, real env return = -104.47


  8%|██▋                                | 611/8000 [1:18:55<14:57:24,  7.29s/it]

Episode 611/8000, real env return = -104.15


  8%|██▋                                | 621/8000 [1:19:56<12:57:14,  6.32s/it]

Episode 621/8000, real env return = -103.36


  8%|██▊                                | 631/8000 [1:21:11<14:13:35,  6.95s/it]

Episode 631/8000, real env return = -103.06


  8%|██▊                                | 641/8000 [1:22:09<10:43:01,  5.24s/it]

Episode 641/8000, real env return = -101.01


  8%|██▊                                | 651/8000 [1:23:17<13:14:10,  6.48s/it]

Episode 651/8000, real env return = -102.80


  8%|██▉                                | 661/8000 [1:24:20<11:22:20,  5.58s/it]

Episode 661/8000, real env return = -101.22


  8%|██▉                                | 671/8000 [1:25:24<12:56:37,  6.36s/it]

Episode 671/8000, real env return = -100.65


  9%|██▉                                | 681/8000 [1:26:33<13:30:04,  6.64s/it]

Episode 681/8000, real env return = -102.89


  9%|███                                | 691/8000 [1:27:42<14:19:20,  7.05s/it]

Episode 691/8000, real env return = -102.42


  9%|███                                | 701/8000 [1:28:37<12:50:26,  6.33s/it]

Episode 701/8000, real env return = -101.91


  9%|███                                | 711/8000 [1:29:32<10:14:17,  5.06s/it]

Episode 711/8000, real env return = -103.46


  9%|███▏                                | 721/8000 [1:30:30<9:10:51,  4.54s/it]

Episode 721/8000, real env return = -102.62


  9%|███▏                               | 731/8000 [1:31:36<15:26:46,  7.65s/it]

Episode 731/8000, real env return = -100.33


  9%|███▏                               | 741/8000 [1:32:40<11:23:29,  5.65s/it]

Episode 741/8000, real env return = -101.43


  9%|███▎                               | 751/8000 [1:33:30<11:52:26,  5.90s/it]

Episode 751/8000, real env return = -100.89


 10%|███▎                               | 761/8000 [1:34:26<10:01:18,  4.98s/it]

Episode 761/8000, real env return = -102.65


 10%|███▎                               | 771/8000 [1:35:21<10:18:12,  5.13s/it]

Episode 771/8000, real env return = -103.85


 10%|███▌                                | 781/8000 [1:36:02<7:51:55,  3.92s/it]

Episode 781/8000, real env return = -100.99


 10%|███▌                                | 791/8000 [1:36:40<7:16:23,  3.63s/it]

Episode 791/8000, real env return = -106.44


 10%|███▌                               | 801/8000 [1:37:42<13:46:47,  6.89s/it]

Episode 801/8000, real env return = -102.56


 10%|███▌                               | 811/8000 [1:38:35<10:40:30,  5.35s/it]

Episode 811/8000, real env return = -100.76


 10%|███▋                                | 821/8000 [1:39:34<9:43:54,  4.88s/it]

Episode 821/8000, real env return = -102.57


 10%|███▋                                | 831/8000 [1:40:20<7:49:41,  3.93s/it]

Episode 831/8000, real env return = -101.82


 11%|███▊                                | 841/8000 [1:41:06<9:49:33,  4.94s/it]

Episode 841/8000, real env return = -101.13


 11%|███▋                               | 851/8000 [1:41:56<11:23:04,  5.73s/it]

Episode 851/8000, real env return = -102.59


 11%|███▊                               | 861/8000 [1:42:52<11:12:35,  5.65s/it]

Episode 861/8000, real env return = -101.09


 11%|███▉                                | 871/8000 [1:43:42<8:45:00,  4.42s/it]

Episode 871/8000, real env return = -102.79


 11%|███▊                               | 881/8000 [1:44:41<10:18:32,  5.21s/it]

Episode 881/8000, real env return = -102.41


 11%|███▉                               | 891/8000 [1:45:31<11:31:39,  5.84s/it]

Episode 891/8000, real env return = -102.01


 11%|███▉                               | 901/8000 [1:46:37<11:20:38,  5.75s/it]

Episode 901/8000, real env return = -103.30


 11%|████                                | 911/8000 [1:47:28<6:38:52,  3.38s/it]

Episode 911/8000, real env return = -101.25


 12%|████                               | 921/8000 [1:48:10<11:15:28,  5.73s/it]

Episode 921/8000, real env return = -101.81


 12%|████▏                               | 931/8000 [1:48:59<9:27:22,  4.82s/it]

Episode 931/8000, real env return = -101.40


 12%|████▏                               | 941/8000 [1:49:50<8:49:16,  4.50s/it]

Episode 941/8000, real env return = -100.96


 12%|████▎                               | 951/8000 [1:50:39<9:19:37,  4.76s/it]

Episode 951/8000, real env return = -100.88


 12%|████▎                               | 961/8000 [1:51:26<8:17:43,  4.24s/it]

Episode 961/8000, real env return = -101.79


 12%|████▏                              | 971/8000 [1:52:20<10:30:38,  5.38s/it]

Episode 971/8000, real env return = -103.93


 12%|████▍                               | 981/8000 [1:53:07<8:52:23,  4.55s/it]

Episode 981/8000, real env return = -101.54


 12%|████▍                               | 991/8000 [1:53:53<6:53:29,  3.54s/it]

Episode 991/8000, real env return = -101.46


 13%|████▍                              | 1001/8000 [1:54:39<7:51:53,  4.05s/it]

Episode 1001/8000, real env return = -102.78


 13%|████▎                             | 1011/8000 [1:55:24<10:01:46,  5.17s/it]

Episode 1011/8000, real env return = -100.83


 13%|████▎                             | 1021/8000 [1:56:09<10:13:44,  5.28s/it]

Episode 1021/8000, real env return = -104.12


 13%|████▌                              | 1031/8000 [1:56:51<6:59:09,  3.61s/it]

Episode 1031/8000, real env return = -103.89


 13%|████▌                              | 1041/8000 [1:57:33<8:02:44,  4.16s/it]

Episode 1041/8000, real env return = -106.55


 13%|████▌                              | 1051/8000 [1:58:09<7:36:47,  3.94s/it]

Episode 1051/8000, real env return = -108.61


 13%|████▋                              | 1061/8000 [1:58:53<7:29:13,  3.88s/it]

Episode 1061/8000, real env return = -102.87


 13%|████▌                             | 1071/8000 [1:59:49<10:31:51,  5.47s/it]

Episode 1071/8000, real env return = -101.39


 14%|████▋                              | 1081/8000 [2:00:30<7:16:54,  3.79s/it]

Episode 1081/8000, real env return = -103.43


 14%|████▋                             | 1091/8000 [2:01:18<10:06:27,  5.27s/it]

Episode 1091/8000, real env return = -102.57


 14%|████▋                             | 1101/8000 [2:02:15<11:13:49,  5.86s/it]

Episode 1101/8000, real env return = -103.70


 14%|████▋                             | 1111/8000 [2:03:09<10:49:14,  5.65s/it]

Episode 1111/8000, real env return = -102.20


 14%|████▉                              | 1121/8000 [2:03:48<7:35:29,  3.97s/it]

Episode 1121/8000, real env return = -100.74


 14%|████▊                             | 1131/8000 [2:04:40<10:48:28,  5.66s/it]

Episode 1131/8000, real env return = -100.56


 14%|████▊                             | 1141/8000 [2:05:31<12:16:30,  6.44s/it]

Episode 1141/8000, real env return = -147.06


 14%|████▉                             | 1151/8000 [2:06:40<13:56:58,  7.33s/it]

Episode 1151/8000, real env return = -137.59


 15%|█████                              | 1161/8000 [2:07:29<6:52:08,  3.62s/it]

Episode 1161/8000, real env return = -105.12


 15%|█████                              | 1171/8000 [2:08:14<8:00:00,  4.22s/it]

Episode 1171/8000, real env return = -109.48


 15%|█████▏                             | 1181/8000 [2:08:44<6:19:32,  3.34s/it]

Episode 1181/8000, real env return = -103.25


 15%|█████▏                             | 1191/8000 [2:09:17<9:07:18,  4.82s/it]

Episode 1191/8000, real env return = -101.68


 15%|█████                             | 1201/8000 [2:10:19<13:59:38,  7.41s/it]

Episode 1201/8000, real env return = -134.26


 15%|█████▏                            | 1211/8000 [2:11:24<13:20:43,  7.08s/it]

Episode 1211/8000, real env return = -137.64


 15%|█████▏                            | 1221/8000 [2:12:26<11:54:44,  6.33s/it]

Episode 1221/8000, real env return = -148.71


 15%|█████▏                            | 1231/8000 [2:13:25<10:27:39,  5.56s/it]

Episode 1231/8000, real env return = -132.38


 16%|█████▍                             | 1241/8000 [2:14:26<9:50:33,  5.24s/it]

Episode 1241/8000, real env return = -134.64


 16%|█████▎                            | 1251/8000 [2:15:44<15:22:10,  8.20s/it]

Episode 1251/8000, real env return = -107.96


 16%|█████▌                             | 1261/8000 [2:16:33<9:38:41,  5.15s/it]

Episode 1261/8000, real env return = -96.71


 16%|█████▍                            | 1271/8000 [2:17:40<14:28:51,  7.75s/it]

Episode 1271/8000, real env return = -114.13


 16%|█████▌                             | 1281/8000 [2:18:35<9:43:42,  5.21s/it]

Episode 1281/8000, real env return = -111.91


 16%|█████▍                            | 1291/8000 [2:19:33<10:48:20,  5.80s/it]

Episode 1291/8000, real env return = -111.46


 16%|█████▌                            | 1301/8000 [2:20:48<14:36:11,  7.85s/it]

Episode 1301/8000, real env return = -101.19


 16%|█████▌                            | 1311/8000 [2:22:01<14:14:34,  7.67s/it]

Episode 1311/8000, real env return = -113.50


 17%|█████▌                            | 1321/8000 [2:23:02<11:09:26,  6.01s/it]

Episode 1321/8000, real env return = -126.59


 17%|█████▋                            | 1331/8000 [2:24:15<11:53:43,  6.42s/it]

Episode 1331/8000, real env return = -108.03


 17%|█████▋                            | 1341/8000 [2:25:32<12:42:46,  6.87s/it]

Episode 1341/8000, real env return = -109.85


 17%|█████▋                            | 1351/8000 [2:26:43<13:20:36,  7.22s/it]

Episode 1351/8000, real env return = -95.38


 17%|█████▊                            | 1361/8000 [2:27:57<12:54:53,  7.00s/it]

Episode 1361/8000, real env return = -115.58


 17%|█████▊                            | 1371/8000 [2:29:04<14:16:36,  7.75s/it]

Episode 1371/8000, real env return = -128.13


 17%|█████▊                            | 1381/8000 [2:30:13<11:38:48,  6.33s/it]

Episode 1381/8000, real env return = -127.56


 17%|█████▉                            | 1391/8000 [2:31:30<12:04:15,  6.58s/it]

Episode 1391/8000, real env return = -100.42


 18%|█████▉                            | 1401/8000 [2:32:37<14:00:48,  7.64s/it]

Episode 1401/8000, real env return = -121.83


 18%|█████▉                            | 1411/8000 [2:33:53<13:38:40,  7.45s/it]

Episode 1411/8000, real env return = -77.55


 18%|██████                            | 1421/8000 [2:34:58<13:04:06,  7.15s/it]

Episode 1421/8000, real env return = -104.69


 18%|██████                            | 1431/8000 [2:36:07<10:23:24,  5.69s/it]

Episode 1431/8000, real env return = -111.98


 18%|██████                            | 1441/8000 [2:37:29<13:44:38,  7.54s/it]

Episode 1441/8000, real env return = -121.03


 18%|██████▏                           | 1451/8000 [2:38:49<12:52:16,  7.08s/it]

Episode 1451/8000, real env return = -112.65


 18%|██████▍                            | 1461/8000 [2:39:33<8:13:56,  4.53s/it]

Episode 1461/8000, real env return = -117.76


 18%|██████▍                            | 1471/8000 [2:40:23<8:53:19,  4.90s/it]

Episode 1471/8000, real env return = -119.61


 19%|██████▎                           | 1481/8000 [2:41:15<10:51:17,  5.99s/it]

Episode 1481/8000, real env return = -94.31


 19%|██████▌                            | 1491/8000 [2:42:01<8:24:30,  4.65s/it]

Episode 1491/8000, real env return = -84.63


 19%|██████▌                            | 1501/8000 [2:42:51<9:31:23,  5.28s/it]

Episode 1501/8000, real env return = -74.70


 19%|██████▌                            | 1511/8000 [2:43:45<8:27:59,  4.70s/it]

Episode 1511/8000, real env return = -88.85


 19%|██████▋                            | 1521/8000 [2:44:35<8:29:08,  4.71s/it]

Episode 1521/8000, real env return = -80.73


 19%|██████▋                            | 1531/8000 [2:45:26<9:42:06,  5.40s/it]

Episode 1531/8000, real env return = -88.84


 19%|██████▋                            | 1541/8000 [2:46:11<8:42:26,  4.85s/it]

Episode 1541/8000, real env return = -68.88


 19%|██████▊                            | 1551/8000 [2:46:55<7:44:23,  4.32s/it]

Episode 1551/8000, real env return = -76.16


 20%|██████▊                            | 1561/8000 [2:47:47<9:26:44,  5.28s/it]

Episode 1561/8000, real env return = -77.18


 20%|██████▋                           | 1571/8000 [2:48:45<11:18:10,  6.33s/it]

Episode 1571/8000, real env return = -83.94


 20%|██████▋                           | 1581/8000 [2:49:41<10:43:34,  6.02s/it]

Episode 1581/8000, real env return = -81.32


 20%|██████▉                            | 1591/8000 [2:50:33<9:06:14,  5.11s/it]

Episode 1591/8000, real env return = -81.38


 20%|██████▊                           | 1601/8000 [2:51:29<10:19:50,  5.81s/it]

Episode 1601/8000, real env return = -81.99


 20%|███████                            | 1611/8000 [2:52:22<9:40:13,  5.45s/it]

Episode 1611/8000, real env return = -78.86


 20%|███████                            | 1621/8000 [2:53:17<9:36:26,  5.42s/it]

Episode 1621/8000, real env return = -72.69


 20%|███████▏                           | 1631/8000 [2:54:10<7:58:10,  4.50s/it]

Episode 1631/8000, real env return = -88.12


 21%|███████▏                           | 1641/8000 [2:55:09<9:38:10,  5.46s/it]

Episode 1641/8000, real env return = -72.33


 21%|███████                           | 1651/8000 [2:56:08<11:20:43,  6.43s/it]

Episode 1651/8000, real env return = -72.43


 21%|███████                           | 1661/8000 [2:56:56<10:08:56,  5.76s/it]

Episode 1661/8000, real env return = -58.23


 21%|███████▎                           | 1671/8000 [2:57:48<8:16:34,  4.71s/it]

Episode 1671/8000, real env return = -99.42


 21%|███████▎                           | 1681/8000 [2:58:40<9:47:23,  5.58s/it]

Episode 1681/8000, real env return = -43.00


 21%|███████▏                          | 1691/8000 [2:59:33<10:26:17,  5.96s/it]

Episode 1691/8000, real env return = -57.25


 21%|███████▍                           | 1701/8000 [3:00:24<8:47:55,  5.03s/it]

Episode 1701/8000, real env return = -66.01


 21%|███████▍                           | 1711/8000 [3:01:18<7:52:57,  4.51s/it]

Episode 1711/8000, real env return = -68.38


 22%|███████▌                           | 1721/8000 [3:02:10<9:41:35,  5.56s/it]

Episode 1721/8000, real env return = -61.18


 22%|███████▌                           | 1731/8000 [3:03:01<8:48:06,  5.05s/it]

Episode 1731/8000, real env return = -76.98


 22%|███████▌                           | 1741/8000 [3:03:46<7:34:11,  4.35s/it]

Episode 1741/8000, real env return = -82.50


 22%|███████▍                          | 1751/8000 [3:04:43<10:40:58,  6.15s/it]

Episode 1751/8000, real env return = -80.62


 22%|███████▋                           | 1761/8000 [3:05:31<8:44:37,  5.05s/it]

Episode 1761/8000, real env return = -85.77


 22%|███████▋                           | 1771/8000 [3:06:20<7:55:25,  4.58s/it]

Episode 1771/8000, real env return = -67.19


 22%|███████▊                           | 1781/8000 [3:07:11<8:30:01,  4.92s/it]

Episode 1781/8000, real env return = -79.56


 22%|███████▊                           | 1791/8000 [3:08:10<9:16:37,  5.38s/it]

Episode 1791/8000, real env return = -75.56


 23%|███████▉                           | 1801/8000 [3:08:56<7:24:48,  4.31s/it]

Episode 1801/8000, real env return = -84.97


 23%|███████▉                           | 1811/8000 [3:09:50<9:15:59,  5.39s/it]

Episode 1811/8000, real env return = -82.51


 23%|███████▋                          | 1821/8000 [3:10:50<10:28:13,  6.10s/it]

Episode 1821/8000, real env return = -73.92


 23%|████████                           | 1831/8000 [3:11:41<8:53:34,  5.19s/it]

Episode 1831/8000, real env return = -70.47


 23%|███████▊                          | 1841/8000 [3:12:40<11:55:07,  6.97s/it]

Episode 1841/8000, real env return = -76.36


 23%|███████▊                          | 1851/8000 [3:13:37<10:50:33,  6.35s/it]

Episode 1851/8000, real env return = -74.99


 23%|███████▉                          | 1861/8000 [3:14:41<10:43:51,  6.29s/it]

Episode 1861/8000, real env return = -67.52


 23%|████████▏                          | 1871/8000 [3:15:36<9:41:48,  5.70s/it]

Episode 1871/8000, real env return = -73.93


 24%|████████▏                          | 1881/8000 [3:16:29<9:08:36,  5.38s/it]

Episode 1881/8000, real env return = -71.99


 24%|████████▎                          | 1891/8000 [3:17:28<9:25:56,  5.56s/it]

Episode 1891/8000, real env return = -79.97


 24%|████████▎                          | 1901/8000 [3:18:20<9:16:48,  5.48s/it]

Episode 1901/8000, real env return = -80.27


 24%|████████▎                          | 1911/8000 [3:19:17<9:21:56,  5.54s/it]

Episode 1911/8000, real env return = -69.81


 24%|████████▏                         | 1921/8000 [3:20:18<10:01:51,  5.94s/it]

Episode 1921/8000, real env return = -69.85


 24%|████████▏                         | 1931/8000 [3:21:15<10:27:25,  6.20s/it]

Episode 1931/8000, real env return = -69.59


 24%|████████▍                          | 1941/8000 [3:22:16<9:29:08,  5.64s/it]

Episode 1941/8000, real env return = -72.57


 24%|████████▌                          | 1951/8000 [3:23:06<8:35:30,  5.11s/it]

Episode 1951/8000, real env return = -82.33


 25%|████████▌                          | 1961/8000 [3:23:59<9:03:54,  5.40s/it]

Episode 1961/8000, real env return = -57.31


 25%|████████▌                          | 1971/8000 [3:24:56<9:27:49,  5.65s/it]

Episode 1971/8000, real env return = -72.75


 25%|████████▋                          | 1981/8000 [3:25:44<8:44:08,  5.22s/it]

Episode 1981/8000, real env return = -60.57


 25%|████████▋                          | 1991/8000 [3:26:32<8:29:20,  5.09s/it]

Episode 1991/8000, real env return = -54.85


 25%|████████▊                          | 2001/8000 [3:27:26<9:43:46,  5.84s/it]

Episode 2001/8000, real env return = -88.72


 25%|████████▌                         | 2011/8000 [3:28:31<10:02:31,  6.04s/it]

Episode 2011/8000, real env return = -83.35


 25%|████████▌                         | 2021/8000 [3:29:24<10:37:02,  6.39s/it]

Episode 2021/8000, real env return = -79.58


 25%|████████▉                          | 2031/8000 [3:30:22<9:29:15,  5.72s/it]

Episode 2031/8000, real env return = -75.19


 26%|████████▋                         | 2041/8000 [3:31:15<10:27:12,  6.32s/it]

Episode 2041/8000, real env return = -74.83


 26%|████████▋                         | 2051/8000 [3:32:18<11:13:35,  6.79s/it]

Episode 2051/8000, real env return = -93.83


 26%|█████████                          | 2061/8000 [3:33:13<9:05:49,  5.51s/it]

Episode 2061/8000, real env return = -107.40


 26%|█████████                          | 2071/8000 [3:34:07<8:05:28,  4.91s/it]

Episode 2071/8000, real env return = -110.02


 26%|█████████                          | 2081/8000 [3:34:56<8:35:33,  5.23s/it]

Episode 2081/8000, real env return = -40.45


 26%|████████▉                         | 2091/8000 [3:35:57<10:10:47,  6.20s/it]

Episode 2091/8000, real env return = -66.26


 26%|█████████▏                         | 2101/8000 [3:36:51<9:11:27,  5.61s/it]

Episode 2101/8000, real env return = -70.87


 26%|█████████▏                         | 2111/8000 [3:37:36<6:24:06,  3.91s/it]

Episode 2111/8000, real env return = -108.42


 27%|█████████▎                         | 2121/8000 [3:38:22<7:35:15,  4.65s/it]

Episode 2121/8000, real env return = -80.05


 27%|█████████▎                         | 2131/8000 [3:39:17<8:53:24,  5.45s/it]

Episode 2131/8000, real env return = -72.56


 27%|█████████▎                         | 2141/8000 [3:40:07<7:25:30,  4.56s/it]

Episode 2141/8000, real env return = -65.43


 27%|█████████▍                         | 2151/8000 [3:40:58<8:15:37,  5.08s/it]

Episode 2151/8000, real env return = -55.04


 27%|█████████▍                         | 2161/8000 [3:41:48<7:20:08,  4.52s/it]

Episode 2161/8000, real env return = -57.65


 27%|█████████▍                         | 2171/8000 [3:42:44<9:09:02,  5.65s/it]

Episode 2171/8000, real env return = -54.01


 27%|█████████▌                         | 2181/8000 [3:43:38<9:42:29,  6.01s/it]

Episode 2181/8000, real env return = -46.48


 27%|█████████▌                         | 2191/8000 [3:44:24<6:56:53,  4.31s/it]

Episode 2191/8000, real env return = -107.45


 28%|█████████▋                         | 2201/8000 [3:45:22<9:05:37,  5.65s/it]

Episode 2201/8000, real env return = -32.22


 28%|█████████▋                         | 2211/8000 [3:46:11<7:39:12,  4.76s/it]

Episode 2211/8000, real env return = -64.43


 28%|█████████▋                         | 2221/8000 [3:46:59<8:16:52,  5.16s/it]

Episode 2221/8000, real env return = -65.19


 28%|█████████▊                         | 2231/8000 [3:47:46<7:53:04,  4.92s/it]

Episode 2231/8000, real env return = -108.41


 28%|█████████▊                         | 2241/8000 [3:48:35<8:31:12,  5.33s/it]

Episode 2241/8000, real env return = -113.44


 28%|█████████▊                         | 2251/8000 [3:49:31<8:45:14,  5.48s/it]

Episode 2251/8000, real env return = 30.32


 28%|█████████▉                         | 2261/8000 [3:50:33<9:32:34,  5.99s/it]

Episode 2261/8000, real env return = -113.33


 28%|█████████▉                         | 2271/8000 [3:51:05<4:04:36,  2.56s/it]

Episode 2271/8000, real env return = -112.41


 29%|█████████▉                         | 2281/8000 [3:51:24<3:34:19,  2.25s/it]

Episode 2281/8000, real env return = -23.88


 29%|██████████                         | 2291/8000 [3:51:49<3:41:28,  2.33s/it]

Episode 2291/8000, real env return = -16.53


 29%|██████████                         | 2301/8000 [3:52:13<3:54:09,  2.47s/it]

Episode 2301/8000, real env return = -44.51


 29%|██████████                         | 2311/8000 [3:52:34<2:59:28,  1.89s/it]

Episode 2311/8000, real env return = -107.01


 29%|██████████▏                        | 2321/8000 [3:52:56<4:04:02,  2.58s/it]

Episode 2321/8000, real env return = 24.02


 29%|██████████▏                        | 2331/8000 [3:53:16<3:12:09,  2.03s/it]

Episode 2331/8000, real env return = 108.62


 29%|██████████▏                        | 2341/8000 [3:53:42<3:46:07,  2.40s/it]

Episode 2341/8000, real env return = -107.20


 29%|██████████▎                        | 2351/8000 [3:54:06<3:54:06,  2.49s/it]

Episode 2351/8000, real env return = 68.14


 30%|██████████▎                        | 2361/8000 [3:54:28<3:25:31,  2.19s/it]

Episode 2361/8000, real env return = 76.09


 30%|██████████▎                        | 2371/8000 [3:54:55<4:01:47,  2.58s/it]

Episode 2371/8000, real env return = 61.59


 30%|██████████▍                        | 2381/8000 [3:55:22<4:07:03,  2.64s/it]

Episode 2381/8000, real env return = 38.25


 30%|██████████▍                        | 2391/8000 [3:55:43<3:25:33,  2.20s/it]

Episode 2391/8000, real env return = 59.05


 30%|██████████▌                        | 2401/8000 [3:56:07<3:49:44,  2.46s/it]

Episode 2401/8000, real env return = 52.49


 30%|██████████▌                        | 2411/8000 [3:56:31<3:44:29,  2.41s/it]

Episode 2411/8000, real env return = 46.29


 30%|██████████▌                        | 2421/8000 [3:56:55<3:43:46,  2.41s/it]

Episode 2421/8000, real env return = -113.04


 30%|██████████▋                        | 2431/8000 [3:57:15<3:12:15,  2.07s/it]

Episode 2431/8000, real env return = -107.57


 31%|██████████▋                        | 2441/8000 [3:57:37<3:20:56,  2.17s/it]

Episode 2441/8000, real env return = 62.46


 31%|██████████▋                        | 2451/8000 [3:58:01<3:40:02,  2.38s/it]

Episode 2451/8000, real env return = 54.80


 31%|██████████▊                        | 2461/8000 [3:58:26<4:19:38,  2.81s/it]

Episode 2461/8000, real env return = 46.80


 31%|██████████▊                        | 2471/8000 [3:58:46<3:18:32,  2.15s/it]

Episode 2471/8000, real env return = 30.03


 31%|██████████▊                        | 2481/8000 [3:59:08<3:04:21,  2.00s/it]

Episode 2481/8000, real env return = -114.06


 31%|██████████▉                        | 2491/8000 [3:59:34<4:20:23,  2.84s/it]

Episode 2491/8000, real env return = 43.25


 31%|██████████▉                        | 2501/8000 [3:59:58<3:27:00,  2.26s/it]

Episode 2501/8000, real env return = 32.73


 31%|██████████▉                        | 2511/8000 [4:00:22<3:45:09,  2.46s/it]

Episode 2511/8000, real env return = 48.36


 32%|███████████                        | 2521/8000 [4:00:44<3:03:07,  2.01s/it]

Episode 2521/8000, real env return = 48.47


 32%|███████████                        | 2531/8000 [4:01:08<4:05:26,  2.69s/it]

Episode 2531/8000, real env return = 48.36


 32%|███████████                        | 2541/8000 [4:01:30<3:31:24,  2.32s/it]

Episode 2541/8000, real env return = 58.59


 32%|███████████▏                       | 2551/8000 [4:01:52<3:02:33,  2.01s/it]

Episode 2551/8000, real env return = -120.98


 32%|███████████▏                       | 2561/8000 [4:02:12<2:16:19,  1.50s/it]

Episode 2561/8000, real env return = -119.75


 32%|███████████▏                       | 2571/8000 [4:02:37<3:49:15,  2.53s/it]

Episode 2571/8000, real env return = 65.05


 32%|███████████▎                       | 2581/8000 [4:03:01<3:25:11,  2.27s/it]

Episode 2581/8000, real env return = 57.73


 32%|███████████▎                       | 2591/8000 [4:03:26<3:36:28,  2.40s/it]

Episode 2591/8000, real env return = 81.49


 33%|███████████▍                       | 2601/8000 [4:03:48<3:25:51,  2.29s/it]

Episode 2601/8000, real env return = 93.83


 33%|███████████▍                       | 2611/8000 [4:04:14<3:43:36,  2.49s/it]

Episode 2611/8000, real env return = 82.86


 33%|███████████▍                       | 2621/8000 [4:04:40<3:28:32,  2.33s/it]

Episode 2621/8000, real env return = 52.69


 33%|███████████▌                       | 2631/8000 [4:05:02<3:27:10,  2.32s/it]

Episode 2631/8000, real env return = 82.88


 33%|███████████▌                       | 2641/8000 [4:05:22<2:37:55,  1.77s/it]

Episode 2641/8000, real env return = -113.98


 33%|███████████▌                       | 2651/8000 [4:05:44<3:16:17,  2.20s/it]

Episode 2651/8000, real env return = 71.15


 33%|███████████▋                       | 2661/8000 [4:06:09<3:48:21,  2.57s/it]

Episode 2661/8000, real env return = 78.66


 33%|███████████▋                       | 2671/8000 [4:06:32<3:08:13,  2.12s/it]

Episode 2671/8000, real env return = -119.95


 34%|███████████▋                       | 2681/8000 [4:06:54<3:16:12,  2.21s/it]

Episode 2681/8000, real env return = -124.55


 34%|███████████▊                       | 2691/8000 [4:07:19<3:26:40,  2.34s/it]

Episode 2691/8000, real env return = 146.11


 34%|███████████▊                       | 2701/8000 [4:07:43<3:12:26,  2.18s/it]

Episode 2701/8000, real env return = 141.86


 34%|███████████▊                       | 2711/8000 [4:08:07<3:34:20,  2.43s/it]

Episode 2711/8000, real env return = 125.65


 34%|███████████▉                       | 2721/8000 [4:08:29<3:31:48,  2.41s/it]

Episode 2721/8000, real env return = 141.99


 34%|███████████▉                       | 2731/8000 [4:08:55<3:40:54,  2.52s/it]

Episode 2731/8000, real env return = 113.29


 34%|███████████▉                       | 2741/8000 [4:09:20<3:38:37,  2.49s/it]

Episode 2741/8000, real env return = 114.33


 34%|████████████                       | 2751/8000 [4:09:44<3:37:09,  2.48s/it]

Episode 2751/8000, real env return = 71.44


 35%|████████████                       | 2761/8000 [4:10:08<3:29:37,  2.40s/it]

Episode 2761/8000, real env return = 68.09


 35%|████████████                       | 2771/8000 [4:10:32<3:30:38,  2.42s/it]

Episode 2771/8000, real env return = 100.44


 35%|████████████▏                      | 2781/8000 [4:10:53<3:15:37,  2.25s/it]

Episode 2781/8000, real env return = 121.23


 35%|████████████▏                      | 2791/8000 [4:11:16<3:47:21,  2.62s/it]

Episode 2791/8000, real env return = 117.99


 35%|████████████▎                      | 2801/8000 [4:11:39<2:49:38,  1.96s/it]

Episode 2801/8000, real env return = -118.36


 35%|████████████▎                      | 2811/8000 [4:12:05<3:46:40,  2.62s/it]

Episode 2811/8000, real env return = 122.20


 35%|████████████▎                      | 2821/8000 [4:12:28<3:40:58,  2.56s/it]

Episode 2821/8000, real env return = 115.84


 35%|████████████▍                      | 2831/8000 [4:12:51<3:17:47,  2.30s/it]

Episode 2831/8000, real env return = 130.13


 36%|████████████▍                      | 2841/8000 [4:13:15<3:25:44,  2.39s/it]

Episode 2841/8000, real env return = 130.66


 36%|████████████▍                      | 2851/8000 [4:13:40<3:19:20,  2.32s/it]

Episode 2851/8000, real env return = 151.53


 36%|████████████▌                      | 2861/8000 [4:14:04<3:35:10,  2.51s/it]

Episode 2861/8000, real env return = 133.67


 36%|████████████▌                      | 2871/8000 [4:14:28<3:17:58,  2.32s/it]

Episode 2871/8000, real env return = 129.05


 36%|████████████▌                      | 2881/8000 [4:14:52<3:20:19,  2.35s/it]

Episode 2881/8000, real env return = 102.12


 36%|████████████▋                      | 2891/8000 [4:15:13<3:17:21,  2.32s/it]

Episode 2891/8000, real env return = 145.77


 36%|████████████▋                      | 2901/8000 [4:15:38<3:16:44,  2.32s/it]

Episode 2901/8000, real env return = 140.62


 36%|████████████▋                      | 2911/8000 [4:16:01<2:59:19,  2.11s/it]

Episode 2911/8000, real env return = 27.36


 37%|████████████▊                      | 2921/8000 [4:16:25<3:21:28,  2.38s/it]

Episode 2921/8000, real env return = 126.56


 37%|████████████▊                      | 2931/8000 [4:16:52<4:00:37,  2.85s/it]

Episode 2931/8000, real env return = 151.92


 37%|████████████▊                      | 2941/8000 [4:17:18<3:39:22,  2.60s/it]

Episode 2941/8000, real env return = 170.81


 37%|████████████▉                      | 2951/8000 [4:17:43<3:33:29,  2.54s/it]

Episode 2951/8000, real env return = 139.67


 37%|████████████▉                      | 2961/8000 [4:18:10<3:40:45,  2.63s/it]

Episode 2961/8000, real env return = 160.04


 37%|████████████▉                      | 2971/8000 [4:18:38<4:00:41,  2.87s/it]

Episode 2971/8000, real env return = 217.64


 37%|█████████████                      | 2981/8000 [4:19:01<3:33:57,  2.56s/it]

Episode 2981/8000, real env return = 197.82


 37%|█████████████                      | 2991/8000 [4:19:24<3:25:20,  2.46s/it]

Episode 2991/8000, real env return = 225.22


 38%|█████████████▏                     | 3001/8000 [4:19:50<3:27:53,  2.50s/it]

Episode 3001/8000, real env return = 199.98


 38%|█████████████▏                     | 3011/8000 [4:20:17<3:48:30,  2.75s/it]

Episode 3011/8000, real env return = 211.05


 38%|█████████████▏                     | 3021/8000 [4:20:41<3:13:35,  2.33s/it]

Episode 3021/8000, real env return = 255.57


 38%|█████████████▎                     | 3031/8000 [4:21:05<3:12:37,  2.33s/it]

Episode 3031/8000, real env return = 238.47


 38%|█████████████▎                     | 3041/8000 [4:21:30<3:21:50,  2.44s/it]

Episode 3041/8000, real env return = 199.64


 38%|█████████████▎                     | 3051/8000 [4:21:53<3:11:14,  2.32s/it]

Episode 3051/8000, real env return = 206.02


 38%|█████████████▍                     | 3061/8000 [4:22:17<2:47:39,  2.04s/it]

Episode 3061/8000, real env return = -115.66


 38%|█████████████▍                     | 3071/8000 [4:22:40<3:06:08,  2.27s/it]

Episode 3071/8000, real env return = 182.28


 39%|█████████████▍                     | 3081/8000 [4:23:04<3:08:07,  2.29s/it]

Episode 3081/8000, real env return = 221.10


 39%|█████████████▌                     | 3091/8000 [4:23:31<3:37:47,  2.66s/it]

Episode 3091/8000, real env return = 212.70


 39%|█████████████▌                     | 3101/8000 [4:23:58<3:33:51,  2.62s/it]

Episode 3101/8000, real env return = 205.92


 39%|█████████████▌                     | 3111/8000 [4:24:23<3:22:10,  2.48s/it]

Episode 3111/8000, real env return = 226.40


 39%|█████████████▋                     | 3121/8000 [4:24:49<3:26:41,  2.54s/it]

Episode 3121/8000, real env return = 210.81


 39%|█████████████▋                     | 3131/8000 [4:25:16<3:29:39,  2.58s/it]

Episode 3131/8000, real env return = 248.53


 39%|█████████████▋                     | 3141/8000 [4:25:40<3:40:57,  2.73s/it]

Episode 3141/8000, real env return = 244.08


 39%|█████████████▊                     | 3151/8000 [4:26:02<3:27:25,  2.57s/it]

Episode 3151/8000, real env return = 82.58


 40%|█████████████▊                     | 3161/8000 [4:26:26<3:22:34,  2.51s/it]

Episode 3161/8000, real env return = 244.19


 40%|█████████████▊                     | 3171/8000 [4:26:52<3:12:51,  2.40s/it]

Episode 3171/8000, real env return = 240.62


 40%|█████████████▉                     | 3181/8000 [4:27:16<2:43:58,  2.04s/it]

Episode 3181/8000, real env return = 55.77


 40%|█████████████▉                     | 3191/8000 [4:27:38<3:00:18,  2.25s/it]

Episode 3191/8000, real env return = 244.99


 40%|██████████████                     | 3201/8000 [4:28:00<3:16:33,  2.46s/it]

Episode 3201/8000, real env return = 250.90


 40%|██████████████                     | 3211/8000 [4:28:26<3:35:01,  2.69s/it]

Episode 3211/8000, real env return = 248.45


 40%|██████████████                     | 3221/8000 [4:28:49<3:11:42,  2.41s/it]

Episode 3221/8000, real env return = 256.79


 40%|██████████████▏                    | 3231/8000 [4:29:14<3:25:29,  2.59s/it]

Episode 3231/8000, real env return = 257.98


 41%|██████████████▏                    | 3241/8000 [4:29:41<3:42:53,  2.81s/it]

Episode 3241/8000, real env return = 253.70


 41%|██████████████▏                    | 3251/8000 [4:30:05<2:58:15,  2.25s/it]

Episode 3251/8000, real env return = 259.35


 41%|██████████████▎                    | 3261/8000 [4:30:25<3:00:26,  2.28s/it]

Episode 3261/8000, real env return = 246.98


 41%|██████████████▎                    | 3271/8000 [4:30:47<2:47:48,  2.13s/it]

Episode 3271/8000, real env return = -107.40


 41%|██████████████▎                    | 3281/8000 [4:31:11<2:59:24,  2.28s/it]

Episode 3281/8000, real env return = 253.35


 41%|██████████████▍                    | 3291/8000 [4:31:33<2:54:47,  2.23s/it]

Episode 3291/8000, real env return = 252.32


 41%|██████████████▍                    | 3301/8000 [4:31:58<3:24:13,  2.61s/it]

Episode 3301/8000, real env return = 257.89


 41%|██████████████▍                    | 3311/8000 [4:32:26<3:47:28,  2.91s/it]

Episode 3311/8000, real env return = 264.12


 42%|██████████████▌                    | 3321/8000 [4:32:49<2:55:09,  2.25s/it]

Episode 3321/8000, real env return = 257.62


 42%|██████████████▌                    | 3331/8000 [4:33:15<3:32:58,  2.74s/it]

Episode 3331/8000, real env return = 253.53


 42%|██████████████▌                    | 3341/8000 [4:33:39<3:01:29,  2.34s/it]

Episode 3341/8000, real env return = 260.12


 42%|██████████████▋                    | 3351/8000 [4:34:04<3:15:57,  2.53s/it]

Episode 3351/8000, real env return = 255.01


 42%|██████████████▋                    | 3361/8000 [4:34:29<3:26:20,  2.67s/it]

Episode 3361/8000, real env return = 253.25


 42%|██████████████▋                    | 3371/8000 [4:34:53<3:05:10,  2.40s/it]

Episode 3371/8000, real env return = 249.08


 42%|██████████████▊                    | 3381/8000 [4:35:19<3:18:22,  2.58s/it]

Episode 3381/8000, real env return = 252.21


 42%|██████████████▊                    | 3391/8000 [4:35:45<3:16:48,  2.56s/it]

Episode 3391/8000, real env return = 249.77


 43%|██████████████▉                    | 3401/8000 [4:36:09<2:51:40,  2.24s/it]

Episode 3401/8000, real env return = 255.45


 43%|██████████████▉                    | 3411/8000 [4:36:35<3:16:06,  2.56s/it]

Episode 3411/8000, real env return = 254.55


 43%|██████████████▉                    | 3421/8000 [4:36:56<2:53:14,  2.27s/it]

Episode 3421/8000, real env return = 253.96


 43%|███████████████                    | 3431/8000 [4:37:18<2:48:31,  2.21s/it]

Episode 3431/8000, real env return = 257.64


 43%|███████████████                    | 3441/8000 [4:37:42<3:00:01,  2.37s/it]

Episode 3441/8000, real env return = 249.47


 43%|███████████████                    | 3451/8000 [4:38:06<2:44:15,  2.17s/it]

Episode 3451/8000, real env return = 249.89


 43%|███████████████▏                   | 3461/8000 [4:38:30<3:08:28,  2.49s/it]

Episode 3461/8000, real env return = 251.45


 43%|███████████████▏                   | 3471/8000 [4:38:54<2:49:11,  2.24s/it]

Episode 3471/8000, real env return = 258.16


 44%|███████████████▏                   | 3481/8000 [4:39:15<2:43:21,  2.17s/it]

Episode 3481/8000, real env return = 254.62


 44%|███████████████▎                   | 3491/8000 [4:39:40<3:21:44,  2.68s/it]

Episode 3491/8000, real env return = 60.42


 44%|███████████████▎                   | 3501/8000 [4:40:05<3:12:05,  2.56s/it]

Episode 3501/8000, real env return = 253.33


 44%|███████████████▎                   | 3511/8000 [4:40:29<3:07:13,  2.50s/it]

Episode 3511/8000, real env return = 251.25


 44%|███████████████▍                   | 3521/8000 [4:40:54<3:20:06,  2.68s/it]

Episode 3521/8000, real env return = 254.63


 44%|███████████████▍                   | 3531/8000 [4:41:16<2:42:23,  2.18s/it]

Episode 3531/8000, real env return = 260.15


 44%|███████████████▍                   | 3541/8000 [4:41:39<2:48:35,  2.27s/it]

Episode 3541/8000, real env return = 257.44


 44%|███████████████▌                   | 3551/8000 [4:42:05<2:55:44,  2.37s/it]

Episode 3551/8000, real env return = 252.52


 45%|███████████████▌                   | 3561/8000 [4:42:30<3:12:29,  2.60s/it]

Episode 3561/8000, real env return = 26.75


 45%|███████████████▌                   | 3571/8000 [4:42:55<2:42:36,  2.20s/it]

Episode 3571/8000, real env return = 261.40


 45%|███████████████▋                   | 3581/8000 [4:43:21<3:03:00,  2.48s/it]

Episode 3581/8000, real env return = 252.85


 45%|███████████████▋                   | 3591/8000 [4:43:45<3:10:57,  2.60s/it]

Episode 3591/8000, real env return = 257.19


 45%|███████████████▊                   | 3601/8000 [4:44:12<3:24:04,  2.78s/it]

Episode 3601/8000, real env return = 253.02


 45%|███████████████▊                   | 3611/8000 [4:44:38<3:00:25,  2.47s/it]

Episode 3611/8000, real env return = 251.35


 45%|███████████████▊                   | 3621/8000 [4:45:04<3:18:19,  2.72s/it]

Episode 3621/8000, real env return = 257.04


 45%|███████████████▉                   | 3631/8000 [4:45:27<3:02:15,  2.50s/it]

Episode 3631/8000, real env return = 132.41


 46%|███████████████▉                   | 3641/8000 [4:45:50<2:43:12,  2.25s/it]

Episode 3641/8000, real env return = 257.04


 46%|███████████████▉                   | 3651/8000 [4:46:13<2:50:05,  2.35s/it]

Episode 3651/8000, real env return = 259.63


 46%|████████████████                   | 3661/8000 [4:46:38<2:52:48,  2.39s/it]

Episode 3661/8000, real env return = 261.98


 46%|████████████████                   | 3671/8000 [4:47:01<2:58:05,  2.47s/it]

Episode 3671/8000, real env return = 254.24


 46%|████████████████                   | 3681/8000 [4:47:24<2:49:10,  2.35s/it]

Episode 3681/8000, real env return = 262.43


 46%|████████████████▏                  | 3691/8000 [4:47:47<2:53:13,  2.41s/it]

Episode 3691/8000, real env return = 263.91


 46%|████████████████▏                  | 3701/8000 [4:48:12<3:04:28,  2.57s/it]

Episode 3701/8000, real env return = 255.99


 46%|████████████████▏                  | 3711/8000 [4:48:39<3:21:28,  2.82s/it]

Episode 3711/8000, real env return = 260.72


 47%|████████████████▎                  | 3721/8000 [4:48:59<2:15:12,  1.90s/it]

Episode 3721/8000, real env return = -114.17


 47%|████████████████▎                  | 3731/8000 [4:49:22<2:56:00,  2.47s/it]

Episode 3731/8000, real env return = 257.81


 47%|████████████████▎                  | 3741/8000 [4:49:46<2:58:59,  2.52s/it]

Episode 3741/8000, real env return = 256.46


 47%|████████████████▍                  | 3751/8000 [4:50:09<2:58:20,  2.52s/it]

Episode 3751/8000, real env return = 256.82


 47%|████████████████▍                  | 3761/8000 [4:50:34<2:56:36,  2.50s/it]

Episode 3761/8000, real env return = 248.62


 47%|████████████████▍                  | 3771/8000 [4:50:59<2:42:43,  2.31s/it]

Episode 3771/8000, real env return = 251.83


 47%|████████████████▌                  | 3781/8000 [4:51:24<2:42:34,  2.31s/it]

Episode 3781/8000, real env return = 254.52


 47%|████████████████▌                  | 3791/8000 [4:51:49<2:47:38,  2.39s/it]

Episode 3791/8000, real env return = 260.33


 48%|████████████████▋                  | 3801/8000 [4:52:12<2:48:58,  2.41s/it]

Episode 3801/8000, real env return = 254.84


 48%|████████████████▋                  | 3811/8000 [4:52:39<2:58:03,  2.55s/it]

Episode 3811/8000, real env return = 256.99


 48%|████████████████▋                  | 3821/8000 [4:53:04<2:55:19,  2.52s/it]

Episode 3821/8000, real env return = 259.29


 48%|████████████████▊                  | 3831/8000 [4:53:30<3:07:53,  2.70s/it]

Episode 3831/8000, real env return = 258.39


 48%|████████████████▊                  | 3841/8000 [4:53:55<2:41:53,  2.34s/it]

Episode 3841/8000, real env return = 119.24


 48%|████████████████▊                  | 3851/8000 [4:54:20<3:14:03,  2.81s/it]

Episode 3851/8000, real env return = 261.46


 48%|████████████████▉                  | 3861/8000 [4:54:46<2:58:55,  2.59s/it]

Episode 3861/8000, real env return = 264.31


 48%|████████████████▉                  | 3871/8000 [4:55:10<2:51:32,  2.49s/it]

Episode 3871/8000, real env return = 259.11


 49%|████████████████▉                  | 3881/8000 [4:55:36<3:03:02,  2.67s/it]

Episode 3881/8000, real env return = 252.72


 49%|█████████████████                  | 3891/8000 [4:56:00<2:39:49,  2.33s/it]

Episode 3891/8000, real env return = 256.18


 49%|█████████████████                  | 3901/8000 [4:56:25<3:02:02,  2.66s/it]

Episode 3901/8000, real env return = 259.22


 49%|█████████████████                  | 3911/8000 [4:56:50<2:51:29,  2.52s/it]

Episode 3911/8000, real env return = 262.49


 49%|█████████████████▏                 | 3921/8000 [4:57:14<2:33:55,  2.26s/it]

Episode 3921/8000, real env return = -46.49


 49%|█████████████████▏                 | 3931/8000 [4:57:36<2:16:51,  2.02s/it]

Episode 3931/8000, real env return = 262.96


 49%|█████████████████▏                 | 3941/8000 [4:58:01<2:52:49,  2.55s/it]

Episode 3941/8000, real env return = 258.10


 49%|█████████████████▎                 | 3951/8000 [4:58:26<2:47:39,  2.48s/it]

Episode 3951/8000, real env return = 258.63


 50%|█████████████████▎                 | 3961/8000 [4:58:49<2:33:54,  2.29s/it]

Episode 3961/8000, real env return = 266.96


 50%|█████████████████▎                 | 3971/8000 [4:59:13<2:28:26,  2.21s/it]

Episode 3971/8000, real env return = 255.58


 50%|█████████████████▍                 | 3981/8000 [4:59:37<2:37:46,  2.36s/it]

Episode 3981/8000, real env return = 260.49


 50%|█████████████████▍                 | 3991/8000 [5:00:02<2:58:29,  2.67s/it]

Episode 3991/8000, real env return = 266.62


 50%|█████████████████▌                 | 4001/8000 [5:00:25<2:32:46,  2.29s/it]

Episode 4001/8000, real env return = 255.48


 50%|█████████████████▌                 | 4011/8000 [5:00:49<2:46:46,  2.51s/it]

Episode 4011/8000, real env return = 265.19


 50%|█████████████████▌                 | 4021/8000 [5:01:12<2:24:36,  2.18s/it]

Episode 4021/8000, real env return = 257.08


 50%|█████████████████▋                 | 4031/8000 [5:01:38<3:02:04,  2.75s/it]

Episode 4031/8000, real env return = 264.41


 51%|█████████████████▋                 | 4041/8000 [5:02:00<2:22:41,  2.16s/it]

Episode 4041/8000, real env return = 258.00


 51%|█████████████████▋                 | 4051/8000 [5:02:23<2:28:36,  2.26s/it]

Episode 4051/8000, real env return = 266.68


 51%|█████████████████▊                 | 4061/8000 [5:02:47<2:30:13,  2.29s/it]

Episode 4061/8000, real env return = 258.83


 51%|█████████████████▊                 | 4071/8000 [5:03:11<2:30:46,  2.30s/it]

Episode 4071/8000, real env return = 260.56


 51%|█████████████████▊                 | 4081/8000 [5:03:38<2:39:04,  2.44s/it]

Episode 4081/8000, real env return = 256.67


 51%|█████████████████▉                 | 4091/8000 [5:04:04<3:00:48,  2.78s/it]

Episode 4091/8000, real env return = 261.81


 51%|█████████████████▉                 | 4101/8000 [5:04:25<2:38:34,  2.44s/it]

Episode 4101/8000, real env return = 35.47


 51%|█████████████████▉                 | 4111/8000 [5:04:49<2:34:35,  2.38s/it]

Episode 4111/8000, real env return = 265.85


 52%|██████████████████                 | 4121/8000 [5:05:14<2:43:19,  2.53s/it]

Episode 4121/8000, real env return = 251.89


 52%|██████████████████                 | 4131/8000 [5:05:37<2:46:21,  2.58s/it]

Episode 4131/8000, real env return = 257.60


 52%|██████████████████                 | 4141/8000 [5:06:01<2:49:25,  2.63s/it]

Episode 4141/8000, real env return = 263.30


 52%|██████████████████▏                | 4151/8000 [5:06:27<2:38:09,  2.47s/it]

Episode 4151/8000, real env return = 264.66


 52%|██████████████████▏                | 4161/8000 [5:06:54<2:37:03,  2.45s/it]

Episode 4161/8000, real env return = 262.33


 52%|██████████████████▏                | 4171/8000 [5:07:17<2:26:47,  2.30s/it]

Episode 4171/8000, real env return = 265.06


 52%|██████████████████▎                | 4181/8000 [5:07:42<2:37:47,  2.48s/it]

Episode 4181/8000, real env return = 269.28


 52%|██████████████████▎                | 4191/8000 [5:08:04<2:24:46,  2.28s/it]

Episode 4191/8000, real env return = 268.76


 53%|██████████████████▍                | 4201/8000 [5:08:30<2:29:39,  2.36s/it]

Episode 4201/8000, real env return = 262.80


 53%|██████████████████▍                | 4211/8000 [5:08:53<2:27:37,  2.34s/it]

Episode 4211/8000, real env return = 271.40


 53%|██████████████████▍                | 4221/8000 [5:09:18<2:35:50,  2.47s/it]

Episode 4221/8000, real env return = 269.54


 53%|██████████████████▌                | 4231/8000 [5:09:44<2:45:03,  2.63s/it]

Episode 4231/8000, real env return = 264.21


 53%|██████████████████▌                | 4241/8000 [5:10:10<2:48:36,  2.69s/it]

Episode 4241/8000, real env return = 260.28


 53%|██████████████████▌                | 4251/8000 [5:10:33<2:16:32,  2.19s/it]

Episode 4251/8000, real env return = 265.51


 53%|██████████████████▋                | 4261/8000 [5:10:57<2:23:29,  2.30s/it]

Episode 4261/8000, real env return = 260.14


 53%|██████████████████▋                | 4271/8000 [5:11:19<2:22:07,  2.29s/it]

Episode 4271/8000, real env return = 272.55


 54%|██████████████████▋                | 4281/8000 [5:11:43<2:09:24,  2.09s/it]

Episode 4281/8000, real env return = 257.91


 54%|██████████████████▊                | 4291/8000 [5:12:09<2:48:14,  2.72s/it]

Episode 4291/8000, real env return = 254.86


 54%|██████████████████▊                | 4301/8000 [5:12:34<2:24:59,  2.35s/it]

Episode 4301/8000, real env return = 267.01


 54%|██████████████████▊                | 4311/8000 [5:12:56<2:07:32,  2.07s/it]

Episode 4311/8000, real env return = 270.41


 54%|██████████████████▉                | 4321/8000 [5:13:21<2:31:03,  2.46s/it]

Episode 4321/8000, real env return = 255.51


 54%|██████████████████▉                | 4331/8000 [5:13:47<2:47:51,  2.75s/it]

Episode 4331/8000, real env return = 267.67


 54%|██████████████████▉                | 4341/8000 [5:14:09<2:13:08,  2.18s/it]

Episode 4341/8000, real env return = 259.28


 54%|███████████████████                | 4351/8000 [5:14:32<2:09:29,  2.13s/it]

Episode 4351/8000, real env return = 276.63


 55%|███████████████████                | 4361/8000 [5:14:55<2:15:57,  2.24s/it]

Episode 4361/8000, real env return = 267.16


 55%|███████████████████                | 4371/8000 [5:15:19<2:05:21,  2.07s/it]

Episode 4371/8000, real env return = 268.16


 55%|███████████████████▏               | 4381/8000 [5:15:45<2:32:43,  2.53s/it]

Episode 4381/8000, real env return = 267.75


 55%|███████████████████▏               | 4391/8000 [5:16:08<2:22:48,  2.37s/it]

Episode 4391/8000, real env return = 276.81


 55%|███████████████████▎               | 4401/8000 [5:16:31<2:31:32,  2.53s/it]

Episode 4401/8000, real env return = 263.55


 55%|███████████████████▎               | 4411/8000 [5:16:54<2:21:17,  2.36s/it]

Episode 4411/8000, real env return = 265.28


 55%|███████████████████▎               | 4421/8000 [5:17:17<2:15:28,  2.27s/it]

Episode 4421/8000, real env return = 270.15


 55%|███████████████████▍               | 4431/8000 [5:17:42<2:33:32,  2.58s/it]

Episode 4431/8000, real env return = 268.46


 56%|███████████████████▍               | 4441/8000 [5:18:07<2:22:22,  2.40s/it]

Episode 4441/8000, real env return = 267.67


 56%|███████████████████▍               | 4451/8000 [5:18:32<2:35:10,  2.62s/it]

Episode 4451/8000, real env return = 271.28


 56%|███████████████████▌               | 4461/8000 [5:18:55<1:59:14,  2.02s/it]

Episode 4461/8000, real env return = 268.52


 56%|███████████████████▌               | 4471/8000 [5:19:18<2:25:08,  2.47s/it]

Episode 4471/8000, real env return = 259.46


 56%|███████████████████▌               | 4481/8000 [5:19:41<2:12:45,  2.26s/it]

Episode 4481/8000, real env return = 267.45


 56%|███████████████████▋               | 4491/8000 [5:20:07<2:35:04,  2.65s/it]

Episode 4491/8000, real env return = 270.09


 56%|███████████████████▋               | 4501/8000 [5:20:33<2:37:04,  2.69s/it]

Episode 4501/8000, real env return = 278.18


 56%|███████████████████▋               | 4511/8000 [5:20:55<2:12:00,  2.27s/it]

Episode 4511/8000, real env return = 279.05


 57%|███████████████████▊               | 4521/8000 [5:21:20<2:21:05,  2.43s/it]

Episode 4521/8000, real env return = 266.35


 57%|███████████████████▊               | 4531/8000 [5:21:45<2:21:49,  2.45s/it]

Episode 4531/8000, real env return = -46.42


 57%|███████████████████▊               | 4541/8000 [5:22:07<2:06:32,  2.19s/it]

Episode 4541/8000, real env return = 273.84


 57%|███████████████████▉               | 4551/8000 [5:22:30<2:09:35,  2.25s/it]

Episode 4551/8000, real env return = 284.81


 57%|███████████████████▉               | 4561/8000 [5:22:52<2:12:15,  2.31s/it]

Episode 4561/8000, real env return = 275.08


 57%|███████████████████▉               | 4571/8000 [5:23:14<2:10:18,  2.28s/it]

Episode 4571/8000, real env return = 273.96


 57%|████████████████████               | 4581/8000 [5:23:38<2:14:47,  2.37s/it]

Episode 4581/8000, real env return = 277.49


 57%|████████████████████               | 4591/8000 [5:24:00<2:12:39,  2.33s/it]

Episode 4591/8000, real env return = 276.17


 58%|████████████████████▏              | 4601/8000 [5:24:24<2:07:25,  2.25s/it]

Episode 4601/8000, real env return = 280.17


 58%|████████████████████▏              | 4611/8000 [5:24:50<2:10:05,  2.30s/it]

Episode 4611/8000, real env return = 271.70


 58%|████████████████████▏              | 4621/8000 [5:25:15<2:12:59,  2.36s/it]

Episode 4621/8000, real env return = 268.91


 58%|████████████████████▎              | 4631/8000 [5:25:39<2:23:06,  2.55s/it]

Episode 4631/8000, real env return = 276.31


 58%|████████████████████▎              | 4641/8000 [5:26:04<2:28:28,  2.65s/it]

Episode 4641/8000, real env return = 276.43


 58%|████████████████████▎              | 4651/8000 [5:26:28<2:12:30,  2.37s/it]

Episode 4651/8000, real env return = 264.71


 58%|████████████████████▍              | 4661/8000 [5:26:51<2:13:01,  2.39s/it]

Episode 4661/8000, real env return = 272.38


 58%|████████████████████▍              | 4671/8000 [5:27:17<2:15:34,  2.44s/it]

Episode 4671/8000, real env return = 273.75


 59%|████████████████████▍              | 4681/8000 [5:27:40<2:10:08,  2.35s/it]

Episode 4681/8000, real env return = 276.65


 59%|████████████████████▌              | 4691/8000 [5:28:04<2:15:39,  2.46s/it]

Episode 4691/8000, real env return = 276.26


 59%|████████████████████▌              | 4701/8000 [5:28:30<2:17:27,  2.50s/it]

Episode 4701/8000, real env return = 270.14


 59%|████████████████████▌              | 4711/8000 [5:28:52<2:00:31,  2.20s/it]

Episode 4711/8000, real env return = 275.45


 59%|████████████████████▋              | 4721/8000 [5:29:17<2:14:43,  2.47s/it]

Episode 4721/8000, real env return = 273.66


 59%|████████████████████▋              | 4731/8000 [5:29:41<2:09:13,  2.37s/it]

Episode 4731/8000, real env return = 279.46


 59%|████████████████████▋              | 4741/8000 [5:30:03<2:00:02,  2.21s/it]

Episode 4741/8000, real env return = 274.27


 59%|████████████████████▊              | 4751/8000 [5:30:29<2:19:43,  2.58s/it]

Episode 4751/8000, real env return = 272.03


 60%|████████████████████▊              | 4761/8000 [5:30:54<2:23:49,  2.66s/it]

Episode 4761/8000, real env return = 274.36


 60%|████████████████████▊              | 4771/8000 [5:31:17<2:08:22,  2.39s/it]

Episode 4771/8000, real env return = 275.64


 60%|████████████████████▉              | 4781/8000 [5:31:40<1:50:30,  2.06s/it]

Episode 4781/8000, real env return = 281.48


 60%|████████████████████▉              | 4791/8000 [5:32:03<2:14:22,  2.51s/it]

Episode 4791/8000, real env return = 274.32


 60%|█████████████████████              | 4801/8000 [5:32:28<2:09:39,  2.43s/it]

Episode 4801/8000, real env return = 279.71


 60%|█████████████████████              | 4811/8000 [5:32:51<2:07:46,  2.40s/it]

Episode 4811/8000, real env return = 281.30


 60%|█████████████████████              | 4821/8000 [5:33:13<1:55:20,  2.18s/it]

Episode 4821/8000, real env return = 279.85


 60%|█████████████████████▏             | 4831/8000 [5:33:37<1:54:41,  2.17s/it]

Episode 4831/8000, real env return = 276.67


 61%|█████████████████████▏             | 4841/8000 [5:34:01<2:07:37,  2.42s/it]

Episode 4841/8000, real env return = 286.19


 61%|█████████████████████▏             | 4851/8000 [5:34:26<2:07:04,  2.42s/it]

Episode 4851/8000, real env return = 280.20


 61%|█████████████████████▎             | 4861/8000 [5:34:51<2:12:03,  2.52s/it]

Episode 4861/8000, real env return = 277.99


 61%|█████████████████████▎             | 4871/8000 [5:35:14<1:53:14,  2.17s/it]

Episode 4871/8000, real env return = 275.96


 61%|█████████████████████▎             | 4881/8000 [5:35:37<1:59:31,  2.30s/it]

Episode 4881/8000, real env return = 281.95


 61%|█████████████████████▍             | 4891/8000 [5:36:00<2:04:46,  2.41s/it]

Episode 4891/8000, real env return = 277.41


 61%|█████████████████████▍             | 4901/8000 [5:36:26<2:12:40,  2.57s/it]

Episode 4901/8000, real env return = 281.43


 61%|█████████████████████▍             | 4911/8000 [5:36:51<1:53:26,  2.20s/it]

Episode 4911/8000, real env return = 280.78


 62%|█████████████████████▌             | 4921/8000 [5:37:15<2:05:08,  2.44s/it]

Episode 4921/8000, real env return = 270.54


 62%|█████████████████████▌             | 4931/8000 [5:37:39<1:49:46,  2.15s/it]

Episode 4931/8000, real env return = 276.47


 62%|█████████████████████▌             | 4941/8000 [5:38:03<2:02:06,  2.39s/it]

Episode 4941/8000, real env return = 279.97


 62%|█████████████████████▋             | 4951/8000 [5:38:25<1:35:00,  1.87s/it]

Episode 4951/8000, real env return = 284.58


 62%|█████████████████████▋             | 4961/8000 [5:38:47<1:59:39,  2.36s/it]

Episode 4961/8000, real env return = 276.77


 62%|█████████████████████▋             | 4971/8000 [5:39:10<1:59:47,  2.37s/it]

Episode 4971/8000, real env return = 279.39


 62%|█████████████████████▊             | 4981/8000 [5:39:33<1:57:48,  2.34s/it]

Episode 4981/8000, real env return = 273.88


 62%|█████████████████████▊             | 4991/8000 [5:39:56<1:50:00,  2.19s/it]

Episode 4991/8000, real env return = 284.13


 63%|█████████████████████▉             | 5001/8000 [5:40:21<2:01:20,  2.43s/it]

Episode 5001/8000, real env return = 281.00


 63%|█████████████████████▉             | 5011/8000 [5:40:44<2:01:28,  2.44s/it]

Episode 5011/8000, real env return = 275.31


 63%|█████████████████████▉             | 5021/8000 [5:41:06<1:37:12,  1.96s/it]

Episode 5021/8000, real env return = 281.55


 63%|██████████████████████             | 5031/8000 [5:41:32<2:07:56,  2.59s/it]

Episode 5031/8000, real env return = 281.91


 63%|██████████████████████             | 5041/8000 [5:41:55<1:52:36,  2.28s/it]

Episode 5041/8000, real env return = 279.34


 63%|██████████████████████             | 5051/8000 [5:42:18<1:42:04,  2.08s/it]

Episode 5051/8000, real env return = 276.80


 63%|██████████████████████▏            | 5061/8000 [5:42:41<1:42:17,  2.09s/it]

Episode 5061/8000, real env return = 278.32


 63%|██████████████████████▏            | 5071/8000 [5:43:04<1:44:09,  2.13s/it]

Episode 5071/8000, real env return = 281.45


 64%|██████████████████████▏            | 5081/8000 [5:43:29<2:04:00,  2.55s/it]

Episode 5081/8000, real env return = 282.43


 64%|██████████████████████▎            | 5091/8000 [5:43:55<2:06:19,  2.61s/it]

Episode 5091/8000, real env return = 283.70


 64%|██████████████████████▎            | 5101/8000 [5:44:20<1:56:51,  2.42s/it]

Episode 5101/8000, real env return = 283.89


 64%|██████████████████████▎            | 5111/8000 [5:44:42<1:40:26,  2.09s/it]

Episode 5111/8000, real env return = 283.92


 64%|██████████████████████▍            | 5121/8000 [5:45:06<1:59:59,  2.50s/it]

Episode 5121/8000, real env return = 276.19


 64%|██████████████████████▍            | 5131/8000 [5:45:29<1:47:37,  2.25s/it]

Episode 5131/8000, real env return = 279.86


 64%|██████████████████████▍            | 5141/8000 [5:45:54<1:56:46,  2.45s/it]

Episode 5141/8000, real env return = 285.24


 64%|██████████████████████▌            | 5151/8000 [5:46:16<1:44:53,  2.21s/it]

Episode 5151/8000, real env return = 282.08


 65%|██████████████████████▌            | 5161/8000 [5:46:38<1:48:28,  2.29s/it]

Episode 5161/8000, real env return = 280.92


 65%|██████████████████████▌            | 5171/8000 [5:47:02<1:52:48,  2.39s/it]

Episode 5171/8000, real env return = 277.30


 65%|██████████████████████▋            | 5181/8000 [5:47:24<1:45:42,  2.25s/it]

Episode 5181/8000, real env return = 285.89


 65%|██████████████████████▋            | 5191/8000 [5:47:44<1:35:32,  2.04s/it]

Episode 5191/8000, real env return = 282.88


 65%|██████████████████████▊            | 5201/8000 [5:48:09<1:50:06,  2.36s/it]

Episode 5201/8000, real env return = 278.69


 65%|██████████████████████▊            | 5211/8000 [5:48:33<1:57:07,  2.52s/it]

Episode 5211/8000, real env return = 278.38


 65%|██████████████████████▊            | 5221/8000 [5:48:56<1:49:38,  2.37s/it]

Episode 5221/8000, real env return = 286.46


 65%|██████████████████████▉            | 5231/8000 [5:49:17<1:46:25,  2.31s/it]

Episode 5231/8000, real env return = 283.30


 66%|██████████████████████▉            | 5241/8000 [5:49:41<1:39:52,  2.17s/it]

Episode 5241/8000, real env return = 277.97


 66%|██████████████████████▉            | 5251/8000 [5:50:04<1:49:53,  2.40s/it]

Episode 5251/8000, real env return = 283.83


 66%|███████████████████████            | 5261/8000 [5:50:27<1:39:33,  2.18s/it]

Episode 5261/8000, real env return = 277.26


 66%|███████████████████████            | 5271/8000 [5:50:53<1:57:46,  2.59s/it]

Episode 5271/8000, real env return = 283.59


 66%|███████████████████████            | 5281/8000 [5:51:15<1:38:00,  2.16s/it]

Episode 5281/8000, real env return = 279.01


 66%|███████████████████████▏           | 5291/8000 [5:51:41<1:51:02,  2.46s/it]

Episode 5291/8000, real env return = 280.90


 66%|███████████████████████▏           | 5301/8000 [5:52:04<1:46:02,  2.36s/it]

Episode 5301/8000, real env return = 283.29


 66%|███████████████████████▏           | 5311/8000 [5:52:27<1:49:52,  2.45s/it]

Episode 5311/8000, real env return = 277.62


 67%|███████████████████████▎           | 5321/8000 [5:52:52<1:51:22,  2.49s/it]

Episode 5321/8000, real env return = 285.19


 67%|███████████████████████▎           | 5331/8000 [5:53:14<1:39:16,  2.23s/it]

Episode 5331/8000, real env return = 284.26


 67%|███████████████████████▎           | 5341/8000 [5:53:37<1:38:33,  2.22s/it]

Episode 5341/8000, real env return = 279.02


 67%|███████████████████████▍           | 5351/8000 [5:54:02<1:52:25,  2.55s/it]

Episode 5351/8000, real env return = 279.29


 67%|███████████████████████▍           | 5361/8000 [5:54:25<1:42:15,  2.32s/it]

Episode 5361/8000, real env return = 283.53


 67%|███████████████████████▍           | 5371/8000 [5:54:48<1:34:46,  2.16s/it]

Episode 5371/8000, real env return = 277.05


 67%|███████████████████████▌           | 5381/8000 [5:55:12<1:48:18,  2.48s/it]

Episode 5381/8000, real env return = 282.51


 67%|███████████████████████▌           | 5391/8000 [5:55:37<1:46:56,  2.46s/it]

Episode 5391/8000, real env return = 282.40


 68%|███████████████████████▋           | 5401/8000 [5:56:03<1:44:56,  2.42s/it]

Episode 5401/8000, real env return = 280.66


 68%|███████████████████████▋           | 5411/8000 [5:56:26<1:36:08,  2.23s/it]

Episode 5411/8000, real env return = 281.68


 68%|███████████████████████▋           | 5421/8000 [5:56:50<1:37:47,  2.27s/it]

Episode 5421/8000, real env return = 288.09


 68%|███████████████████████▊           | 5431/8000 [5:57:13<1:33:57,  2.19s/it]

Episode 5431/8000, real env return = 280.07


 68%|███████████████████████▊           | 5441/8000 [5:57:35<1:40:07,  2.35s/it]

Episode 5441/8000, real env return = 282.94


 68%|███████████████████████▊           | 5451/8000 [5:57:58<1:31:53,  2.16s/it]

Episode 5451/8000, real env return = 284.72


 68%|███████████████████████▉           | 5461/8000 [5:58:22<1:40:34,  2.38s/it]

Episode 5461/8000, real env return = 282.23


 68%|███████████████████████▉           | 5471/8000 [5:58:45<1:40:21,  2.38s/it]

Episode 5471/8000, real env return = 281.71


 69%|███████████████████████▉           | 5481/8000 [5:59:07<1:31:21,  2.18s/it]

Episode 5481/8000, real env return = 279.77


 69%|████████████████████████           | 5491/8000 [5:59:32<1:37:59,  2.34s/it]

Episode 5491/8000, real env return = 280.87


 69%|████████████████████████           | 5501/8000 [5:59:56<1:43:48,  2.49s/it]

Episode 5501/8000, real env return = 282.54


 69%|████████████████████████           | 5511/8000 [6:00:20<1:34:30,  2.28s/it]

Episode 5511/8000, real env return = 282.33


 69%|████████████████████████▏          | 5521/8000 [6:00:43<1:33:27,  2.26s/it]

Episode 5521/8000, real env return = 285.04


 69%|████████████████████████▏          | 5531/8000 [6:01:07<1:43:42,  2.52s/it]

Episode 5531/8000, real env return = 281.96


 69%|████████████████████████▏          | 5541/8000 [6:01:32<1:45:06,  2.56s/it]

Episode 5541/8000, real env return = 284.92


 69%|████████████████████████▎          | 5551/8000 [6:01:57<1:38:12,  2.41s/it]

Episode 5551/8000, real env return = 280.63


 70%|████████████████████████▎          | 5561/8000 [6:02:20<1:32:38,  2.28s/it]

Episode 5561/8000, real env return = 284.29


 70%|████████████████████████▎          | 5571/8000 [6:02:43<1:34:09,  2.33s/it]

Episode 5571/8000, real env return = 282.58


 70%|████████████████████████▍          | 5581/8000 [6:03:06<1:34:55,  2.35s/it]

Episode 5581/8000, real env return = 285.44


 70%|████████████████████████▍          | 5591/8000 [6:03:30<1:42:10,  2.54s/it]

Episode 5591/8000, real env return = 289.37


 70%|████████████████████████▌          | 5601/8000 [6:03:52<1:31:49,  2.30s/it]

Episode 5601/8000, real env return = 282.11


 70%|████████████████████████▌          | 5611/8000 [6:04:16<1:33:36,  2.35s/it]

Episode 5611/8000, real env return = 281.38


 70%|████████████████████████▌          | 5621/8000 [6:04:39<1:31:23,  2.31s/it]

Episode 5621/8000, real env return = 283.31


 70%|████████████████████████▋          | 5631/8000 [6:05:03<1:33:12,  2.36s/it]

Episode 5631/8000, real env return = 286.32


 71%|████████████████████████▋          | 5641/8000 [6:05:27<1:32:12,  2.35s/it]

Episode 5641/8000, real env return = 285.78


 71%|████████████████████████▋          | 5651/8000 [6:05:51<1:35:39,  2.44s/it]

Episode 5651/8000, real env return = 286.89


 71%|████████████████████████▊          | 5661/8000 [6:06:13<1:23:32,  2.14s/it]

Episode 5661/8000, real env return = 285.68


 71%|████████████████████████▊          | 5671/8000 [6:06:36<1:25:29,  2.20s/it]

Episode 5671/8000, real env return = 285.14


 71%|████████████████████████▊          | 5681/8000 [6:07:01<1:37:31,  2.52s/it]

Episode 5681/8000, real env return = 284.59


 71%|████████████████████████▉          | 5691/8000 [6:07:25<1:41:00,  2.62s/it]

Episode 5691/8000, real env return = 281.88


 71%|████████████████████████▉          | 5701/8000 [6:07:46<1:17:26,  2.02s/it]

Episode 5701/8000, real env return = 288.26


 71%|████████████████████████▉          | 5711/8000 [6:08:10<1:30:36,  2.38s/it]

Episode 5711/8000, real env return = 288.33


 72%|█████████████████████████          | 5721/8000 [6:08:31<1:24:30,  2.23s/it]

Episode 5721/8000, real env return = 288.52


 72%|█████████████████████████          | 5731/8000 [6:08:53<1:24:39,  2.24s/it]

Episode 5731/8000, real env return = 283.47


 72%|█████████████████████████          | 5741/8000 [6:09:16<1:26:30,  2.30s/it]

Episode 5741/8000, real env return = 280.71


 72%|█████████████████████████▏         | 5751/8000 [6:09:40<1:34:13,  2.51s/it]

Episode 5751/8000, real env return = 283.97


 72%|█████████████████████████▏         | 5761/8000 [6:10:04<1:22:59,  2.22s/it]

Episode 5761/8000, real env return = 285.81


 72%|█████████████████████████▏         | 5771/8000 [6:10:26<1:25:04,  2.29s/it]

Episode 5771/8000, real env return = 284.76


 72%|█████████████████████████▎         | 5781/8000 [6:10:51<1:33:47,  2.54s/it]

Episode 5781/8000, real env return = 284.27


 72%|█████████████████████████▎         | 5791/8000 [6:11:10<1:11:15,  1.94s/it]

Episode 5791/8000, real env return = 282.34


 73%|█████████████████████████▍         | 5801/8000 [6:11:31<1:17:17,  2.11s/it]

Episode 5801/8000, real env return = 283.82


 73%|█████████████████████████▍         | 5811/8000 [6:11:53<1:22:28,  2.26s/it]

Episode 5811/8000, real env return = 286.16


 73%|█████████████████████████▍         | 5821/8000 [6:12:16<1:29:09,  2.46s/it]

Episode 5821/8000, real env return = 285.59


 73%|█████████████████████████▌         | 5831/8000 [6:12:39<1:16:59,  2.13s/it]

Episode 5831/8000, real env return = 287.48


 73%|█████████████████████████▌         | 5841/8000 [6:13:02<1:31:41,  2.55s/it]

Episode 5841/8000, real env return = 285.45


 73%|█████████████████████████▌         | 5851/8000 [6:13:26<1:26:46,  2.42s/it]

Episode 5851/8000, real env return = 287.26


 73%|█████████████████████████▋         | 5861/8000 [6:13:48<1:20:44,  2.26s/it]

Episode 5861/8000, real env return = 289.30


 73%|█████████████████████████▋         | 5871/8000 [6:14:10<1:13:22,  2.07s/it]

Episode 5871/8000, real env return = 280.99


 74%|█████████████████████████▋         | 5881/8000 [6:14:32<1:16:47,  2.17s/it]

Episode 5881/8000, real env return = 284.77


 74%|█████████████████████████▊         | 5891/8000 [6:14:55<1:22:12,  2.34s/it]

Episode 5891/8000, real env return = 288.23


 74%|█████████████████████████▊         | 5901/8000 [6:15:17<1:13:44,  2.11s/it]

Episode 5901/8000, real env return = 281.74


 74%|█████████████████████████▊         | 5911/8000 [6:15:40<1:26:21,  2.48s/it]

Episode 5911/8000, real env return = 292.10


 74%|█████████████████████████▉         | 5921/8000 [6:16:04<1:22:32,  2.38s/it]

Episode 5921/8000, real env return = 286.13


 74%|█████████████████████████▉         | 5931/8000 [6:16:27<1:19:27,  2.30s/it]

Episode 5931/8000, real env return = 288.46


 74%|█████████████████████████▉         | 5941/8000 [6:16:49<1:11:25,  2.08s/it]

Episode 5941/8000, real env return = 282.47


 74%|██████████████████████████         | 5951/8000 [6:17:11<1:15:48,  2.22s/it]

Episode 5951/8000, real env return = 282.16


 75%|██████████████████████████         | 5961/8000 [6:17:33<1:11:23,  2.10s/it]

Episode 5961/8000, real env return = 288.75


 75%|██████████████████████████         | 5971/8000 [6:17:57<1:25:05,  2.52s/it]

Episode 5971/8000, real env return = 285.80


 75%|██████████████████████████▏        | 5981/8000 [6:18:19<1:15:34,  2.25s/it]

Episode 5981/8000, real env return = 287.52


 75%|██████████████████████████▏        | 5991/8000 [6:18:40<1:06:02,  1.97s/it]

Episode 5991/8000, real env return = 287.96


 75%|██████████████████████████▎        | 6001/8000 [6:19:03<1:18:33,  2.36s/it]

Episode 6001/8000, real env return = 286.85


 75%|██████████████████████████▎        | 6011/8000 [6:19:25<1:09:26,  2.09s/it]

Episode 6011/8000, real env return = 289.14


 75%|██████████████████████████▎        | 6021/8000 [6:19:48<1:21:10,  2.46s/it]

Episode 6021/8000, real env return = 287.42


 75%|██████████████████████████▍        | 6031/8000 [6:20:12<1:24:17,  2.57s/it]

Episode 6031/8000, real env return = 290.24


 76%|██████████████████████████▍        | 6041/8000 [6:20:37<1:17:07,  2.36s/it]

Episode 6041/8000, real env return = 284.88


 76%|██████████████████████████▍        | 6051/8000 [6:20:59<1:12:01,  2.22s/it]

Episode 6051/8000, real env return = 285.56


 76%|██████████████████████████▌        | 6061/8000 [6:21:22<1:17:49,  2.41s/it]

Episode 6061/8000, real env return = 286.52


 76%|██████████████████████████▌        | 6071/8000 [6:21:43<1:10:42,  2.20s/it]

Episode 6071/8000, real env return = 288.12


 76%|██████████████████████████▌        | 6081/8000 [6:22:05<1:09:37,  2.18s/it]

Episode 6081/8000, real env return = 288.20


 76%|██████████████████████████▋        | 6091/8000 [6:22:28<1:13:43,  2.32s/it]

Episode 6091/8000, real env return = 287.20


 76%|██████████████████████████▋        | 6101/8000 [6:22:49<1:11:36,  2.26s/it]

Episode 6101/8000, real env return = 285.94


 76%|██████████████████████████▋        | 6111/8000 [6:23:12<1:17:49,  2.47s/it]

Episode 6111/8000, real env return = 283.26


 77%|██████████████████████████▊        | 6121/8000 [6:23:33<1:11:18,  2.28s/it]

Episode 6121/8000, real env return = 286.91


 77%|██████████████████████████▊        | 6131/8000 [6:23:56<1:14:53,  2.40s/it]

Episode 6131/8000, real env return = 286.35


 77%|██████████████████████████▊        | 6141/8000 [6:24:21<1:20:02,  2.58s/it]

Episode 6141/8000, real env return = 289.30


 77%|██████████████████████████▉        | 6151/8000 [6:24:44<1:11:24,  2.32s/it]

Episode 6151/8000, real env return = 286.86


 77%|██████████████████████████▉        | 6161/8000 [6:25:07<1:07:58,  2.22s/it]

Episode 6161/8000, real env return = 286.15


 77%|██████████████████████████▉        | 6171/8000 [6:25:30<1:09:34,  2.28s/it]

Episode 6171/8000, real env return = 289.47


 77%|███████████████████████████        | 6181/8000 [6:25:51<1:07:24,  2.22s/it]

Episode 6181/8000, real env return = 284.64


 77%|███████████████████████████        | 6191/8000 [6:26:14<1:08:28,  2.27s/it]

Episode 6191/8000, real env return = 281.00


 78%|████████████████████████████▋        | 6201/8000 [6:26:35<59:10,  1.97s/it]

Episode 6201/8000, real env return = 289.63


 78%|███████████████████████████▏       | 6211/8000 [6:26:58<1:09:32,  2.33s/it]

Episode 6211/8000, real env return = 290.40


 78%|███████████████████████████▏       | 6221/8000 [6:27:21<1:10:42,  2.38s/it]

Episode 6221/8000, real env return = 286.11


 78%|███████████████████████████▎       | 6231/8000 [6:27:44<1:06:03,  2.24s/it]

Episode 6231/8000, real env return = 288.54


 78%|███████████████████████████▎       | 6241/8000 [6:28:06<1:02:49,  2.14s/it]

Episode 6241/8000, real env return = 286.77


 78%|███████████████████████████▎       | 6251/8000 [6:28:30<1:08:04,  2.34s/it]

Episode 6251/8000, real env return = 292.85


 78%|███████████████████████████▍       | 6261/8000 [6:28:53<1:12:34,  2.50s/it]

Episode 6261/8000, real env return = 285.74


 78%|███████████████████████████▍       | 6271/8000 [6:29:17<1:10:01,  2.43s/it]

Episode 6271/8000, real env return = 282.41


 79%|███████████████████████████▍       | 6281/8000 [6:29:38<1:03:47,  2.23s/it]

Episode 6281/8000, real env return = 291.21


 79%|███████████████████████████▌       | 6291/8000 [6:30:02<1:04:08,  2.25s/it]

Episode 6291/8000, real env return = 289.69


 79%|███████████████████████████▌       | 6301/8000 [6:30:26<1:06:28,  2.35s/it]

Episode 6301/8000, real env return = 282.83


 79%|███████████████████████████▌       | 6311/8000 [6:30:49<1:06:40,  2.37s/it]

Episode 6311/8000, real env return = 289.25


 79%|███████████████████████████▋       | 6321/8000 [6:31:14<1:03:49,  2.28s/it]

Episode 6321/8000, real env return = 286.15


 79%|█████████████████████████████▎       | 6331/8000 [6:31:36<59:59,  2.16s/it]

Episode 6331/8000, real env return = 291.65


 79%|█████████████████████████████▎       | 6341/8000 [6:31:58<57:16,  2.07s/it]

Episode 6341/8000, real env return = 288.79


 79%|███████████████████████████▊       | 6351/8000 [6:32:21<1:01:23,  2.23s/it]

Episode 6351/8000, real env return = 289.23


 80%|███████████████████████████▊       | 6361/8000 [6:32:44<1:05:33,  2.40s/it]

Episode 6361/8000, real env return = 290.97


 80%|███████████████████████████▊       | 6371/8000 [6:33:06<1:04:07,  2.36s/it]

Episode 6371/8000, real env return = 288.44


 80%|█████████████████████████████▌       | 6381/8000 [6:33:27<57:41,  2.14s/it]

Episode 6381/8000, real env return = 291.36


 80%|███████████████████████████▉       | 6391/8000 [6:33:51<1:02:26,  2.33s/it]

Episode 6391/8000, real env return = 293.07


 80%|████████████████████████████       | 6401/8000 [6:34:15<1:06:14,  2.49s/it]

Episode 6401/8000, real env return = 291.48


 80%|████████████████████████████       | 6411/8000 [6:34:39<1:04:07,  2.42s/it]

Episode 6411/8000, real env return = -21.04


 80%|█████████████████████████████▋       | 6421/8000 [6:35:01<57:21,  2.18s/it]

Episode 6421/8000, real env return = 290.12


 80%|████████████████████████████▏      | 6431/8000 [6:35:26<1:06:48,  2.55s/it]

Episode 6431/8000, real env return = 281.56


 81%|█████████████████████████████▊       | 6441/8000 [6:35:49<59:17,  2.28s/it]

Episode 6441/8000, real env return = 294.34


 81%|████████████████████████████▏      | 6451/8000 [6:36:12<1:01:15,  2.37s/it]

Episode 6451/8000, real env return = 289.19


 81%|████████████████████████████▎      | 6461/8000 [6:36:35<1:03:32,  2.48s/it]

Episode 6461/8000, real env return = 283.52


 81%|█████████████████████████████▉       | 6471/8000 [6:36:57<57:35,  2.26s/it]

Episode 6471/8000, real env return = 291.06


 81%|█████████████████████████████▉       | 6481/8000 [6:37:20<59:27,  2.35s/it]

Episode 6481/8000, real env return = 291.28


 81%|██████████████████████████████       | 6491/8000 [6:37:43<52:45,  2.10s/it]

Episode 6491/8000, real env return = 291.05


 81%|██████████████████████████████       | 6501/8000 [6:38:04<53:30,  2.14s/it]

Episode 6501/8000, real env return = 288.79


 81%|██████████████████████████████       | 6511/8000 [6:38:25<55:42,  2.24s/it]

Episode 6511/8000, real env return = 281.87


 82%|██████████████████████████████▏      | 6521/8000 [6:38:48<54:19,  2.20s/it]

Episode 6521/8000, real env return = 289.29


 82%|██████████████████████████████▏      | 6531/8000 [6:39:09<53:23,  2.18s/it]

Episode 6531/8000, real env return = 292.40


 82%|██████████████████████████████▎      | 6541/8000 [6:39:32<56:05,  2.31s/it]

Episode 6541/8000, real env return = 287.33


 82%|██████████████████████████████▎      | 6551/8000 [6:39:55<53:28,  2.21s/it]

Episode 6551/8000, real env return = 289.96


 82%|██████████████████████████████▎      | 6561/8000 [6:40:19<59:41,  2.49s/it]

Episode 6561/8000, real env return = 293.81


 82%|██████████████████████████████▍      | 6571/8000 [6:40:41<49:03,  2.06s/it]

Episode 6571/8000, real env return = 288.58


 82%|██████████████████████████████▍      | 6581/8000 [6:41:04<52:57,  2.24s/it]

Episode 6581/8000, real env return = 290.05


 82%|██████████████████████████████▍      | 6591/8000 [6:41:27<48:55,  2.08s/it]

Episode 6591/8000, real env return = 293.21


 83%|██████████████████████████████▌      | 6601/8000 [6:41:49<55:17,  2.37s/it]

Episode 6601/8000, real env return = 284.48


 83%|██████████████████████████████▌      | 6611/8000 [6:42:11<50:12,  2.17s/it]

Episode 6611/8000, real env return = 288.00


 83%|██████████████████████████████▌      | 6621/8000 [6:42:31<45:38,  1.99s/it]

Episode 6621/8000, real env return = 283.87


 83%|██████████████████████████████▋      | 6631/8000 [6:42:53<48:06,  2.11s/it]

Episode 6631/8000, real env return = 289.08


 83%|██████████████████████████████▋      | 6641/8000 [6:43:15<48:18,  2.13s/it]

Episode 6641/8000, real env return = 292.47


 83%|██████████████████████████████▊      | 6651/8000 [6:43:39<53:16,  2.37s/it]

Episode 6651/8000, real env return = 284.53


 83%|██████████████████████████████▊      | 6661/8000 [6:44:02<53:24,  2.39s/it]

Episode 6661/8000, real env return = 290.57


 83%|██████████████████████████████▊      | 6671/8000 [6:44:24<51:33,  2.33s/it]

Episode 6671/8000, real env return = 286.86


 84%|██████████████████████████████▉      | 6681/8000 [6:44:46<44:32,  2.03s/it]

Episode 6681/8000, real env return = 287.79


 84%|██████████████████████████████▉      | 6691/8000 [6:45:10<53:26,  2.45s/it]

Episode 6691/8000, real env return = 286.62


 84%|██████████████████████████████▉      | 6701/8000 [6:45:33<50:05,  2.31s/it]

Episode 6701/8000, real env return = 285.59


 84%|███████████████████████████████      | 6711/8000 [6:45:55<44:54,  2.09s/it]

Episode 6711/8000, real env return = 288.29


 84%|███████████████████████████████      | 6721/8000 [6:46:17<48:37,  2.28s/it]

Episode 6721/8000, real env return = 289.42


 84%|███████████████████████████████▏     | 6731/8000 [6:46:39<49:29,  2.34s/it]

Episode 6731/8000, real env return = 282.84


 84%|███████████████████████████████▏     | 6741/8000 [6:47:00<43:55,  2.09s/it]

Episode 6741/8000, real env return = 287.11


 84%|███████████████████████████████▏     | 6751/8000 [6:47:23<46:35,  2.24s/it]

Episode 6751/8000, real env return = 288.74


 85%|███████████████████████████████▎     | 6761/8000 [6:47:46<45:55,  2.22s/it]

Episode 6761/8000, real env return = 289.65


 85%|███████████████████████████████▎     | 6771/8000 [6:48:09<48:16,  2.36s/it]

Episode 6771/8000, real env return = 283.71


 85%|███████████████████████████████▎     | 6781/8000 [6:48:34<50:52,  2.50s/it]

Episode 6781/8000, real env return = 289.74


 85%|███████████████████████████████▍     | 6791/8000 [6:48:57<45:08,  2.24s/it]

Episode 6791/8000, real env return = 292.24


 85%|███████████████████████████████▍     | 6801/8000 [6:49:18<40:05,  2.01s/it]

Episode 6801/8000, real env return = 284.00


 85%|███████████████████████████████▌     | 6811/8000 [6:49:41<42:44,  2.16s/it]

Episode 6811/8000, real env return = 285.20


 85%|███████████████████████████████▌     | 6821/8000 [6:50:04<46:29,  2.37s/it]

Episode 6821/8000, real env return = 284.21


 85%|███████████████████████████████▌     | 6831/8000 [6:50:27<45:48,  2.35s/it]

Episode 6831/8000, real env return = 282.07


 86%|███████████████████████████████▋     | 6841/8000 [6:50:49<46:16,  2.40s/it]

Episode 6841/8000, real env return = 280.26


 86%|███████████████████████████████▋     | 6851/8000 [6:51:13<44:01,  2.30s/it]

Episode 6851/8000, real env return = 284.79


 86%|███████████████████████████████▋     | 6861/8000 [6:51:35<42:20,  2.23s/it]

Episode 6861/8000, real env return = 289.34


 86%|███████████████████████████████▊     | 6871/8000 [6:51:56<40:08,  2.13s/it]

Episode 6871/8000, real env return = 283.91


 86%|███████████████████████████████▊     | 6881/8000 [6:52:17<40:06,  2.15s/it]

Episode 6881/8000, real env return = 280.83


 86%|███████████████████████████████▊     | 6891/8000 [6:52:40<40:55,  2.21s/it]

Episode 6891/8000, real env return = 285.79


 86%|███████████████████████████████▉     | 6901/8000 [6:53:02<38:12,  2.09s/it]

Episode 6901/8000, real env return = 284.17


 86%|███████████████████████████████▉     | 6911/8000 [6:53:24<41:15,  2.27s/it]

Episode 6911/8000, real env return = 278.07


 87%|████████████████████████████████     | 6921/8000 [6:53:47<36:43,  2.04s/it]

Episode 6921/8000, real env return = 287.30


 87%|████████████████████████████████     | 6931/8000 [6:54:09<37:34,  2.11s/it]

Episode 6931/8000, real env return = 286.75


 87%|████████████████████████████████     | 6941/8000 [6:54:31<38:52,  2.20s/it]

Episode 6941/8000, real env return = 283.26


 87%|████████████████████████████████▏    | 6951/8000 [6:54:54<38:31,  2.20s/it]

Episode 6951/8000, real env return = 284.43


 87%|████████████████████████████████▏    | 6961/8000 [6:55:17<39:24,  2.28s/it]

Episode 6961/8000, real env return = 282.82


 87%|████████████████████████████████▏    | 6971/8000 [6:55:38<37:01,  2.16s/it]

Episode 6971/8000, real env return = 284.04


 87%|████████████████████████████████▎    | 6981/8000 [6:56:03<41:58,  2.47s/it]

Episode 6981/8000, real env return = 281.05


 87%|████████████████████████████████▎    | 6991/8000 [6:56:23<31:56,  1.90s/it]

Episode 6991/8000, real env return = 288.39


 88%|████████████████████████████████▍    | 7001/8000 [6:56:46<36:02,  2.17s/it]

Episode 7001/8000, real env return = 283.94


 88%|████████████████████████████████▍    | 7011/8000 [6:57:09<38:43,  2.35s/it]

Episode 7011/8000, real env return = 289.87


 88%|████████████████████████████████▍    | 7021/8000 [6:57:32<39:34,  2.43s/it]

Episode 7021/8000, real env return = 286.15


 88%|████████████████████████████████▌    | 7031/8000 [6:57:55<34:55,  2.16s/it]

Episode 7031/8000, real env return = 287.44


 88%|████████████████████████████████▌    | 7041/8000 [6:58:19<38:35,  2.41s/it]

Episode 7041/8000, real env return = 285.81


 88%|████████████████████████████████▌    | 7051/8000 [6:58:42<35:45,  2.26s/it]

Episode 7051/8000, real env return = 281.89


 88%|████████████████████████████████▋    | 7061/8000 [6:59:05<36:21,  2.32s/it]

Episode 7061/8000, real env return = 287.41


 88%|████████████████████████████████▋    | 7071/8000 [6:59:27<32:17,  2.09s/it]

Episode 7071/8000, real env return = 287.72


 89%|████████████████████████████████▋    | 7081/8000 [6:59:49<32:40,  2.13s/it]

Episode 7081/8000, real env return = 288.47


 89%|████████████████████████████████▊    | 7091/8000 [7:00:11<34:22,  2.27s/it]

Episode 7091/8000, real env return = 283.03


 89%|████████████████████████████████▊    | 7101/8000 [7:00:33<32:51,  2.19s/it]

Episode 7101/8000, real env return = 285.96


 89%|████████████████████████████████▉    | 7111/8000 [7:00:55<33:56,  2.29s/it]

Episode 7111/8000, real env return = 285.48


 89%|████████████████████████████████▉    | 7121/8000 [7:01:19<34:50,  2.38s/it]

Episode 7121/8000, real env return = 281.45


 89%|████████████████████████████████▉    | 7131/8000 [7:01:42<32:49,  2.27s/it]

Episode 7131/8000, real env return = 285.29


 89%|█████████████████████████████████    | 7141/8000 [7:02:06<34:31,  2.41s/it]

Episode 7141/8000, real env return = 278.99


 89%|█████████████████████████████████    | 7151/8000 [7:02:28<31:07,  2.20s/it]

Episode 7151/8000, real env return = 285.99


 90%|█████████████████████████████████    | 7161/8000 [7:02:51<31:03,  2.22s/it]

Episode 7161/8000, real env return = 278.12


 90%|█████████████████████████████████▏   | 7171/8000 [7:03:15<34:30,  2.50s/it]

Episode 7171/8000, real env return = 285.97


 90%|█████████████████████████████████▏   | 7181/8000 [7:03:39<31:22,  2.30s/it]

Episode 7181/8000, real env return = 288.65


 90%|█████████████████████████████████▎   | 7191/8000 [7:04:02<30:46,  2.28s/it]

Episode 7191/8000, real env return = 288.74


 90%|█████████████████████████████████▎   | 7201/8000 [7:04:23<27:40,  2.08s/it]

Episode 7201/8000, real env return = 281.96


 90%|█████████████████████████████████▎   | 7211/8000 [7:04:45<27:19,  2.08s/it]

Episode 7211/8000, real env return = 285.69


 90%|█████████████████████████████████▍   | 7221/8000 [7:05:08<29:38,  2.28s/it]

Episode 7221/8000, real env return = 288.01


 90%|█████████████████████████████████▍   | 7231/8000 [7:05:30<28:30,  2.22s/it]

Episode 7231/8000, real env return = 282.43


 91%|█████████████████████████████████▍   | 7241/8000 [7:05:52<28:12,  2.23s/it]

Episode 7241/8000, real env return = 283.18


 91%|█████████████████████████████████▌   | 7251/8000 [7:06:16<28:34,  2.29s/it]

Episode 7251/8000, real env return = 285.22


 91%|█████████████████████████████████▌   | 7261/8000 [7:06:39<25:02,  2.03s/it]

Episode 7261/8000, real env return = 286.23


 91%|█████████████████████████████████▋   | 7271/8000 [7:07:03<27:26,  2.26s/it]

Episode 7271/8000, real env return = 284.38


 91%|█████████████████████████████████▋   | 7281/8000 [7:07:26<28:23,  2.37s/it]

Episode 7281/8000, real env return = 291.03


 91%|█████████████████████████████████▋   | 7291/8000 [7:07:48<24:29,  2.07s/it]

Episode 7291/8000, real env return = 282.87


 91%|█████████████████████████████████▊   | 7301/8000 [7:08:11<26:13,  2.25s/it]

Episode 7301/8000, real env return = 287.90


 91%|█████████████████████████████████▊   | 7311/8000 [7:08:35<28:06,  2.45s/it]

Episode 7311/8000, real env return = 285.96


 92%|█████████████████████████████████▊   | 7321/8000 [7:08:57<24:21,  2.15s/it]

Episode 7321/8000, real env return = 287.45


 92%|█████████████████████████████████▉   | 7331/8000 [7:09:21<26:52,  2.41s/it]

Episode 7331/8000, real env return = 288.01


 92%|█████████████████████████████████▉   | 7341/8000 [7:09:44<27:31,  2.51s/it]

Episode 7341/8000, real env return = 287.38


 92%|█████████████████████████████████▉   | 7351/8000 [7:10:05<24:41,  2.28s/it]

Episode 7351/8000, real env return = 287.01


 92%|██████████████████████████████████   | 7361/8000 [7:10:28<25:30,  2.40s/it]

Episode 7361/8000, real env return = 283.86


 92%|██████████████████████████████████   | 7371/8000 [7:10:51<22:41,  2.16s/it]

Episode 7371/8000, real env return = 285.22


 92%|██████████████████████████████████▏  | 7381/8000 [7:11:15<24:49,  2.41s/it]

Episode 7381/8000, real env return = 281.16


 92%|██████████████████████████████████▏  | 7391/8000 [7:11:37<20:41,  2.04s/it]

Episode 7391/8000, real env return = 290.67


 93%|██████████████████████████████████▏  | 7401/8000 [7:12:00<22:14,  2.23s/it]

Episode 7401/8000, real env return = 289.30


 93%|██████████████████████████████████▎  | 7411/8000 [7:12:19<18:09,  1.85s/it]

Episode 7411/8000, real env return = 283.19


 93%|██████████████████████████████████▎  | 7421/8000 [7:12:42<22:05,  2.29s/it]

Episode 7421/8000, real env return = 286.56


 93%|██████████████████████████████████▎  | 7431/8000 [7:13:05<21:50,  2.30s/it]

Episode 7431/8000, real env return = 287.03


 93%|██████████████████████████████████▍  | 7441/8000 [7:13:28<20:34,  2.21s/it]

Episode 7441/8000, real env return = 284.61


 93%|██████████████████████████████████▍  | 7451/8000 [7:13:50<20:56,  2.29s/it]

Episode 7451/8000, real env return = 282.39


 93%|██████████████████████████████████▌  | 7461/8000 [7:14:13<19:57,  2.22s/it]

Episode 7461/8000, real env return = 286.22


 93%|██████████████████████████████████▌  | 7471/8000 [7:14:35<19:52,  2.26s/it]

Episode 7471/8000, real env return = 287.95


 94%|██████████████████████████████████▌  | 7481/8000 [7:14:58<19:53,  2.30s/it]

Episode 7481/8000, real env return = 285.90


 94%|██████████████████████████████████▋  | 7491/8000 [7:15:20<20:58,  2.47s/it]

Episode 7491/8000, real env return = 281.08


 94%|██████████████████████████████████▋  | 7501/8000 [7:15:42<18:44,  2.25s/it]

Episode 7501/8000, real env return = 282.48


 94%|██████████████████████████████████▋  | 7511/8000 [7:16:03<17:02,  2.09s/it]

Episode 7511/8000, real env return = 286.35


 94%|██████████████████████████████████▊  | 7521/8000 [7:16:25<16:46,  2.10s/it]

Episode 7521/8000, real env return = 282.15


 94%|██████████████████████████████████▊  | 7531/8000 [7:16:47<15:18,  1.96s/it]

Episode 7531/8000, real env return = 283.54


 94%|██████████████████████████████████▉  | 7541/8000 [7:17:08<15:11,  1.99s/it]

Episode 7541/8000, real env return = 284.38


 94%|██████████████████████████████████▉  | 7551/8000 [7:17:30<17:13,  2.30s/it]

Episode 7551/8000, real env return = 288.57


 95%|██████████████████████████████████▉  | 7561/8000 [7:17:53<16:42,  2.28s/it]

Episode 7561/8000, real env return = 281.33


 95%|███████████████████████████████████  | 7571/8000 [7:18:16<15:38,  2.19s/it]

Episode 7571/8000, real env return = 288.22


 95%|███████████████████████████████████  | 7581/8000 [7:18:40<16:53,  2.42s/it]

Episode 7581/8000, real env return = 283.84


 95%|███████████████████████████████████  | 7591/8000 [7:19:03<15:39,  2.30s/it]

Episode 7591/8000, real env return = 278.25


 95%|███████████████████████████████████▏ | 7601/8000 [7:19:25<15:17,  2.30s/it]

Episode 7601/8000, real env return = 283.82


 95%|███████████████████████████████████▏ | 7611/8000 [7:19:47<14:11,  2.19s/it]

Episode 7611/8000, real env return = 281.79


 95%|███████████████████████████████████▏ | 7621/8000 [7:20:08<13:19,  2.11s/it]

Episode 7621/8000, real env return = 280.59


 95%|███████████████████████████████████▎ | 7631/8000 [7:20:32<14:22,  2.34s/it]

Episode 7631/8000, real env return = 278.24


 96%|███████████████████████████████████▎ | 7641/8000 [7:20:54<12:54,  2.16s/it]

Episode 7641/8000, real env return = 283.94


 96%|███████████████████████████████████▍ | 7651/8000 [7:21:15<12:24,  2.13s/it]

Episode 7651/8000, real env return = 281.81


 96%|███████████████████████████████████▍ | 7661/8000 [7:21:38<13:04,  2.31s/it]

Episode 7661/8000, real env return = 283.11


 96%|███████████████████████████████████▍ | 7671/8000 [7:22:02<13:14,  2.41s/it]

Episode 7671/8000, real env return = 283.74


 96%|███████████████████████████████████▌ | 7681/8000 [7:22:25<12:24,  2.33s/it]

Episode 7681/8000, real env return = 287.02


 96%|███████████████████████████████████▌ | 7691/8000 [7:22:49<11:53,  2.31s/it]

Episode 7691/8000, real env return = 282.23


 96%|███████████████████████████████████▌ | 7701/8000 [7:23:12<11:20,  2.28s/it]

Episode 7701/8000, real env return = 285.37


 96%|███████████████████████████████████▋ | 7711/8000 [7:23:36<11:15,  2.34s/it]

Episode 7711/8000, real env return = 282.78


 97%|███████████████████████████████████▋ | 7721/8000 [7:23:57<09:15,  1.99s/it]

Episode 7721/8000, real env return = 285.49


 97%|███████████████████████████████████▊ | 7731/8000 [7:24:20<10:23,  2.32s/it]

Episode 7731/8000, real env return = 282.65


 97%|███████████████████████████████████▊ | 7741/8000 [7:24:44<09:47,  2.27s/it]

Episode 7741/8000, real env return = 281.43


 97%|███████████████████████████████████▊ | 7751/8000 [7:25:07<09:34,  2.31s/it]

Episode 7751/8000, real env return = 284.18


 97%|███████████████████████████████████▉ | 7761/8000 [7:25:31<09:26,  2.37s/it]

Episode 7761/8000, real env return = 280.43


 97%|███████████████████████████████████▉ | 7771/8000 [7:25:53<08:24,  2.20s/it]

Episode 7771/8000, real env return = 283.42


 97%|███████████████████████████████████▉ | 7781/8000 [7:26:15<08:42,  2.38s/it]

Episode 7781/8000, real env return = 281.69


 97%|████████████████████████████████████ | 7791/8000 [7:26:38<07:53,  2.26s/it]

Episode 7791/8000, real env return = 285.24


 98%|████████████████████████████████████ | 7801/8000 [7:27:01<07:46,  2.34s/it]

Episode 7801/8000, real env return = 290.37


 98%|████████████████████████████████████▏| 7811/8000 [7:27:24<07:14,  2.30s/it]

Episode 7811/8000, real env return = 289.22


 98%|████████████████████████████████████▏| 7821/8000 [7:27:45<05:55,  1.99s/it]

Episode 7821/8000, real env return = 285.74


 98%|████████████████████████████████████▏| 7831/8000 [7:28:08<06:24,  2.28s/it]

Episode 7831/8000, real env return = 284.33


 98%|████████████████████████████████████▎| 7841/8000 [7:28:30<05:56,  2.24s/it]

Episode 7841/8000, real env return = 292.70


 98%|████████████████████████████████████▎| 7851/8000 [7:28:51<05:17,  2.13s/it]

Episode 7851/8000, real env return = 285.09


 98%|████████████████████████████████████▎| 7861/8000 [7:29:12<05:22,  2.32s/it]

Episode 7861/8000, real env return = 289.99


 98%|████████████████████████████████████▍| 7871/8000 [7:29:34<05:02,  2.34s/it]

Episode 7871/8000, real env return = 284.69


 99%|████████████████████████████████████▍| 7881/8000 [7:29:57<04:28,  2.26s/it]

Episode 7881/8000, real env return = 287.51


 99%|████████████████████████████████████▍| 7891/8000 [7:30:18<03:34,  1.96s/it]

Episode 7891/8000, real env return = 289.86


 99%|████████████████████████████████████▌| 7901/8000 [7:30:41<03:40,  2.23s/it]

Episode 7901/8000, real env return = 284.71


 99%|████████████████████████████████████▌| 7911/8000 [7:31:02<03:00,  2.03s/it]

Episode 7911/8000, real env return = 287.29


 99%|████████████████████████████████████▋| 7921/8000 [7:31:25<02:52,  2.19s/it]

Episode 7921/8000, real env return = 282.22


 99%|████████████████████████████████████▋| 7931/8000 [7:31:48<02:41,  2.33s/it]

Episode 7931/8000, real env return = 283.23


 99%|████████████████████████████████████▋| 7941/8000 [7:32:09<02:18,  2.35s/it]

Episode 7941/8000, real env return = 280.38


 99%|████████████████████████████████████▊| 7951/8000 [7:32:31<01:42,  2.08s/it]

Episode 7951/8000, real env return = 281.52


100%|████████████████████████████████████▊| 7961/8000 [7:32:53<01:29,  2.30s/it]

Episode 7961/8000, real env return = 287.16


100%|████████████████████████████████████▊| 7971/8000 [7:33:15<01:05,  2.26s/it]

Episode 7971/8000, real env return = 286.67


100%|████████████████████████████████████▉| 7981/8000 [7:33:37<00:42,  2.22s/it]

Episode 7981/8000, real env return = 283.65


100%|████████████████████████████████████▉| 7991/8000 [7:34:00<00:21,  2.40s/it]

Episode 7991/8000, real env return = 284.69


100%|█████████████████████████████████████| 8000/8000 [7:34:18<00:00,  3.41s/it]


Training finished.


In [6]:
train3 = main()

Using cuda device


  0%|                                                  | 0/8000 [00:00<?, ?it/s]

Episode 1/8000, real env return = -103.57


  0%|                                       | 11/8000 [00:14<4:01:20,  1.81s/it]

Episode 11/8000, real env return = -82.25


  0%|                                       | 21/8000 [00:32<3:52:19,  1.75s/it]

Episode 21/8000, real env return = -90.82


  0%|▏                                      | 31/8000 [00:50<4:09:30,  1.88s/it]

Episode 31/8000, real env return = -78.70


  1%|▏                                      | 41/8000 [01:11<4:52:55,  2.21s/it]

Episode 41/8000, real env return = -66.21


  1%|▏                                      | 51/8000 [01:34<5:54:29,  2.68s/it]

Episode 51/8000, real env return = -68.48


  1%|▎                                      | 61/8000 [01:53<4:43:04,  2.14s/it]

Episode 61/8000, real env return = -54.07


  1%|▎                                      | 71/8000 [02:16<4:59:10,  2.26s/it]

Episode 71/8000, real env return = -84.65


  1%|▍                                      | 81/8000 [02:39<4:50:19,  2.20s/it]

Episode 81/8000, real env return = -64.03


  1%|▍                                      | 91/8000 [03:05<5:48:29,  2.64s/it]

Episode 91/8000, real env return = -61.72


  1%|▍                                     | 101/8000 [03:32<5:28:23,  2.49s/it]

Episode 101/8000, real env return = -124.25


  1%|▌                                     | 111/8000 [03:57<5:36:29,  2.56s/it]

Episode 111/8000, real env return = -48.38


  2%|▌                                     | 121/8000 [04:23<5:21:39,  2.45s/it]

Episode 121/8000, real env return = -41.27


  2%|▌                                     | 131/8000 [04:49<5:20:58,  2.45s/it]

Episode 131/8000, real env return = -24.59


  2%|▋                                     | 141/8000 [05:15<5:29:42,  2.52s/it]

Episode 141/8000, real env return = -37.53


  2%|▋                                     | 151/8000 [05:40<5:54:50,  2.71s/it]

Episode 151/8000, real env return = -115.95


  2%|▊                                     | 161/8000 [06:07<5:47:31,  2.66s/it]

Episode 161/8000, real env return = -125.11


  2%|▊                                     | 171/8000 [06:32<6:00:30,  2.76s/it]

Episode 171/8000, real env return = -76.69


  2%|▊                                     | 181/8000 [07:00<6:29:35,  2.99s/it]

Episode 181/8000, real env return = -65.07


  2%|▉                                     | 191/8000 [07:27<6:16:10,  2.89s/it]

Episode 191/8000, real env return = -69.67


  3%|▉                                     | 201/8000 [07:54<6:09:15,  2.84s/it]

Episode 201/8000, real env return = -18.98


  3%|█                                     | 211/8000 [08:22<5:34:57,  2.58s/it]

Episode 211/8000, real env return = -113.93


  3%|█                                     | 221/8000 [08:49<5:39:42,  2.62s/it]

Episode 221/8000, real env return = -104.20


  3%|█                                     | 231/8000 [09:18<6:09:18,  2.85s/it]

Episode 231/8000, real env return = -107.99


  3%|█▏                                    | 241/8000 [09:47<6:30:20,  3.02s/it]

Episode 241/8000, real env return = -102.24


  3%|█▏                                    | 251/8000 [10:17<6:15:47,  2.91s/it]

Episode 251/8000, real env return = -106.50


  3%|█▏                                    | 261/8000 [10:47<6:27:26,  3.00s/it]

Episode 261/8000, real env return = -106.62


  3%|█▎                                    | 271/8000 [11:16<5:48:23,  2.70s/it]

Episode 271/8000, real env return = -94.18


  4%|█▎                                    | 281/8000 [11:46<6:31:18,  3.04s/it]

Episode 281/8000, real env return = -93.09


  4%|█▍                                    | 291/8000 [12:17<6:31:06,  3.04s/it]

Episode 291/8000, real env return = -90.19


  4%|█▍                                    | 301/8000 [12:45<5:53:29,  2.75s/it]

Episode 301/8000, real env return = -101.85


  4%|█▍                                    | 311/8000 [13:13<6:03:34,  2.84s/it]

Episode 311/8000, real env return = -98.64


  4%|█▌                                    | 321/8000 [13:43<5:51:19,  2.75s/it]

Episode 321/8000, real env return = -103.81


  4%|█▌                                    | 331/8000 [14:12<6:14:55,  2.93s/it]

Episode 331/8000, real env return = -97.42


  4%|█▌                                    | 341/8000 [14:41<6:12:02,  2.91s/it]

Episode 341/8000, real env return = -94.02


  4%|█▋                                    | 351/8000 [15:11<6:32:29,  3.08s/it]

Episode 351/8000, real env return = -103.33


  5%|█▋                                    | 361/8000 [15:42<6:42:43,  3.16s/it]

Episode 361/8000, real env return = -79.48


  5%|█▊                                    | 371/8000 [16:10<5:51:17,  2.76s/it]

Episode 371/8000, real env return = -101.27


  5%|█▊                                    | 381/8000 [16:41<6:18:36,  2.98s/it]

Episode 381/8000, real env return = -95.69


  5%|█▊                                    | 391/8000 [17:11<6:15:59,  2.96s/it]

Episode 391/8000, real env return = -85.64


  5%|█▉                                    | 401/8000 [17:41<6:07:34,  2.90s/it]

Episode 401/8000, real env return = -71.71


  5%|█▉                                    | 411/8000 [18:12<6:26:08,  3.05s/it]

Episode 411/8000, real env return = -74.31


  5%|█▉                                    | 421/8000 [18:42<6:29:10,  3.08s/it]

Episode 421/8000, real env return = -78.71


  5%|██                                    | 431/8000 [19:10<6:08:59,  2.93s/it]

Episode 431/8000, real env return = -103.37


  6%|██                                    | 441/8000 [19:42<6:48:04,  3.24s/it]

Episode 441/8000, real env return = -103.39


  6%|██▏                                   | 451/8000 [20:12<6:21:54,  3.04s/it]

Episode 451/8000, real env return = -99.12


  6%|██▏                                   | 461/8000 [20:40<5:53:23,  2.81s/it]

Episode 461/8000, real env return = -83.09


  6%|██▏                                   | 471/8000 [21:11<6:34:18,  3.14s/it]

Episode 471/8000, real env return = -73.41


  6%|██▎                                   | 481/8000 [21:41<6:21:00,  3.04s/it]

Episode 481/8000, real env return = -77.10


  6%|██▎                                   | 491/8000 [22:09<5:18:28,  2.54s/it]

Episode 491/8000, real env return = -101.21


  6%|██▍                                   | 501/8000 [22:34<5:20:48,  2.57s/it]

Episode 501/8000, real env return = -123.80


  6%|██▍                                   | 511/8000 [22:58<4:32:56,  2.19s/it]

Episode 511/8000, real env return = -124.42


  7%|██▍                                   | 521/8000 [23:20<4:51:03,  2.33s/it]

Episode 521/8000, real env return = -103.93


  7%|██▌                                   | 531/8000 [23:51<6:14:59,  3.01s/it]

Episode 531/8000, real env return = -60.39


  7%|██▌                                   | 541/8000 [24:22<6:26:41,  3.11s/it]

Episode 541/8000, real env return = -69.14


  7%|██▌                                   | 551/8000 [24:49<5:37:20,  2.72s/it]

Episode 551/8000, real env return = -58.54


  7%|██▋                                   | 561/8000 [25:20<6:12:03,  3.00s/it]

Episode 561/8000, real env return = -89.09


  7%|██▋                                   | 571/8000 [25:50<6:04:50,  2.95s/it]

Episode 571/8000, real env return = -111.03


  7%|██▊                                   | 581/8000 [26:16<5:39:53,  2.75s/it]

Episode 581/8000, real env return = -62.98


  7%|██▊                                   | 591/8000 [26:46<5:54:47,  2.87s/it]

Episode 591/8000, real env return = -50.60


  8%|██▊                                   | 601/8000 [27:15<5:26:38,  2.65s/it]

Episode 601/8000, real env return = -101.64


  8%|██▉                                   | 611/8000 [27:38<4:41:51,  2.29s/it]

Episode 611/8000, real env return = -102.91


  8%|██▉                                   | 621/8000 [28:03<5:12:06,  2.54s/it]

Episode 621/8000, real env return = -106.67


  8%|██▉                                   | 631/8000 [28:23<4:23:33,  2.15s/it]

Episode 631/8000, real env return = -103.67


  8%|███                                   | 641/8000 [28:45<4:17:12,  2.10s/it]

Episode 641/8000, real env return = -100.95


  8%|███                                   | 651/8000 [29:08<4:34:21,  2.24s/it]

Episode 651/8000, real env return = -104.79


  8%|███▏                                  | 661/8000 [29:30<4:33:49,  2.24s/it]

Episode 661/8000, real env return = -105.05


  8%|███▏                                  | 671/8000 [29:53<4:42:56,  2.32s/it]

Episode 671/8000, real env return = -103.88


  9%|███▏                                  | 681/8000 [30:15<4:28:06,  2.20s/it]

Episode 681/8000, real env return = -104.44


  9%|███▎                                  | 691/8000 [30:36<4:39:50,  2.30s/it]

Episode 691/8000, real env return = -103.11


  9%|███▎                                  | 701/8000 [30:56<4:21:52,  2.15s/it]

Episode 701/8000, real env return = -102.11


  9%|███▍                                  | 711/8000 [31:17<4:32:40,  2.24s/it]

Episode 711/8000, real env return = -102.67


  9%|███▍                                  | 721/8000 [31:37<3:59:45,  1.98s/it]

Episode 721/8000, real env return = -103.52


  9%|███▍                                  | 731/8000 [31:59<4:23:13,  2.17s/it]

Episode 731/8000, real env return = -101.82


  9%|███▌                                  | 741/8000 [32:17<3:22:48,  1.68s/it]

Episode 741/8000, real env return = -104.99


  9%|███▌                                  | 751/8000 [32:38<4:12:20,  2.09s/it]

Episode 751/8000, real env return = -102.01


 10%|███▌                                  | 761/8000 [32:54<3:47:47,  1.89s/it]

Episode 761/8000, real env return = -104.50


 10%|███▋                                  | 771/8000 [33:13<3:55:30,  1.95s/it]

Episode 771/8000, real env return = -102.93


 10%|███▋                                  | 781/8000 [33:30<3:43:44,  1.86s/it]

Episode 781/8000, real env return = -103.23


 10%|███▊                                  | 791/8000 [33:51<4:27:59,  2.23s/it]

Episode 791/8000, real env return = -101.33


 10%|███▊                                  | 801/8000 [34:10<3:53:36,  1.95s/it]

Episode 801/8000, real env return = -102.94


 10%|███▊                                  | 811/8000 [34:32<4:16:42,  2.14s/it]

Episode 811/8000, real env return = -103.73


 10%|███▉                                  | 821/8000 [34:48<3:38:25,  1.83s/it]

Episode 821/8000, real env return = -103.97


 10%|███▉                                  | 831/8000 [35:04<3:37:10,  1.82s/it]

Episode 831/8000, real env return = -103.58


 11%|███▉                                  | 841/8000 [35:22<3:30:41,  1.77s/it]

Episode 841/8000, real env return = -102.81


 11%|████                                  | 851/8000 [35:40<3:43:47,  1.88s/it]

Episode 851/8000, real env return = -102.03


 11%|████                                  | 861/8000 [35:56<3:07:17,  1.57s/it]

Episode 861/8000, real env return = -102.58


 11%|████▏                                 | 871/8000 [36:16<3:57:04,  2.00s/it]

Episode 871/8000, real env return = -102.14


 11%|████▏                                 | 881/8000 [36:36<3:36:54,  1.83s/it]

Episode 881/8000, real env return = -104.77


 11%|████▏                                 | 891/8000 [36:54<3:24:02,  1.72s/it]

Episode 891/8000, real env return = -104.27


 11%|████▎                                 | 901/8000 [37:11<3:14:39,  1.65s/it]

Episode 901/8000, real env return = -102.42


 11%|████▎                                 | 911/8000 [37:26<3:05:00,  1.57s/it]

Episode 911/8000, real env return = -102.97


 12%|████▎                                 | 921/8000 [37:43<3:33:37,  1.81s/it]

Episode 921/8000, real env return = -102.21


 12%|████▍                                 | 931/8000 [38:01<3:15:21,  1.66s/it]

Episode 931/8000, real env return = -105.20


 12%|████▍                                 | 941/8000 [38:19<3:54:21,  1.99s/it]

Episode 941/8000, real env return = -104.99


 12%|████▌                                 | 951/8000 [38:37<3:39:37,  1.87s/it]

Episode 951/8000, real env return = -101.77


 12%|████▌                                 | 961/8000 [38:53<2:53:20,  1.48s/it]

Episode 961/8000, real env return = -102.68


 12%|████▌                                 | 971/8000 [39:08<2:49:36,  1.45s/it]

Episode 971/8000, real env return = -103.25


 12%|████▋                                 | 981/8000 [39:28<3:53:12,  1.99s/it]

Episode 981/8000, real env return = -122.63


 12%|████▋                                 | 991/8000 [39:49<4:50:46,  2.49s/it]

Episode 991/8000, real env return = -52.65


 13%|████▋                                | 1001/8000 [40:09<3:35:15,  1.85s/it]

Episode 1001/8000, real env return = -104.85


 13%|████▋                                | 1011/8000 [40:26<3:09:03,  1.62s/it]

Episode 1011/8000, real env return = -103.37


 13%|████▋                                | 1021/8000 [40:41<3:12:59,  1.66s/it]

Episode 1021/8000, real env return = -105.00


 13%|████▊                                | 1031/8000 [40:57<3:29:42,  1.81s/it]

Episode 1031/8000, real env return = -104.44


 13%|████▊                                | 1041/8000 [41:15<3:34:46,  1.85s/it]

Episode 1041/8000, real env return = -39.84


 13%|████▊                                | 1051/8000 [41:37<3:46:54,  1.96s/it]

Episode 1051/8000, real env return = -35.30


 13%|████▉                                | 1061/8000 [41:58<4:04:47,  2.12s/it]

Episode 1061/8000, real env return = -39.96


 13%|████▉                                | 1071/8000 [42:20<4:17:31,  2.23s/it]

Episode 1071/8000, real env return = -38.48


 14%|████▉                                | 1081/8000 [42:43<4:38:42,  2.42s/it]

Episode 1081/8000, real env return = -45.10


 14%|█████                                | 1091/8000 [43:07<5:03:22,  2.63s/it]

Episode 1091/8000, real env return = -35.37


 14%|█████                                | 1101/8000 [43:31<4:09:47,  2.17s/it]

Episode 1101/8000, real env return = -43.03


 14%|█████▏                               | 1111/8000 [43:54<4:12:03,  2.20s/it]

Episode 1111/8000, real env return = -39.82


 14%|█████▏                               | 1121/8000 [44:18<4:15:37,  2.23s/it]

Episode 1121/8000, real env return = -40.66


 14%|█████▏                               | 1131/8000 [44:39<3:50:35,  2.01s/it]

Episode 1131/8000, real env return = -33.52


 14%|█████▎                               | 1141/8000 [45:03<4:29:35,  2.36s/it]

Episode 1141/8000, real env return = -36.70


 14%|█████▎                               | 1151/8000 [45:25<4:33:47,  2.40s/it]

Episode 1151/8000, real env return = -37.81


 15%|█████▎                               | 1161/8000 [45:50<4:54:13,  2.58s/it]

Episode 1161/8000, real env return = -31.88


 15%|█████▍                               | 1171/8000 [46:15<4:29:41,  2.37s/it]

Episode 1171/8000, real env return = -30.78


 15%|█████▍                               | 1181/8000 [46:39<4:30:31,  2.38s/it]

Episode 1181/8000, real env return = -56.72


 15%|█████▌                               | 1191/8000 [47:03<4:30:44,  2.39s/it]

Episode 1191/8000, real env return = -39.51


 15%|█████▌                               | 1201/8000 [47:29<4:51:08,  2.57s/it]

Episode 1201/8000, real env return = -48.16


 15%|█████▌                               | 1211/8000 [47:53<4:15:11,  2.26s/it]

Episode 1211/8000, real env return = -39.53


 15%|█████▋                               | 1221/8000 [48:18<4:37:51,  2.46s/it]

Episode 1221/8000, real env return = -33.94


 15%|█████▋                               | 1231/8000 [48:43<4:41:03,  2.49s/it]

Episode 1231/8000, real env return = -32.25


 16%|█████▋                               | 1241/8000 [49:08<4:49:09,  2.57s/it]

Episode 1241/8000, real env return = -35.85


 16%|█████▊                               | 1251/8000 [49:32<4:37:29,  2.47s/it]

Episode 1251/8000, real env return = -33.57


 16%|█████▊                               | 1261/8000 [50:00<5:15:55,  2.81s/it]

Episode 1261/8000, real env return = -49.94


 16%|█████▉                               | 1271/8000 [50:27<4:58:06,  2.66s/it]

Episode 1271/8000, real env return = -28.92


 16%|█████▉                               | 1281/8000 [50:52<4:26:13,  2.38s/it]

Episode 1281/8000, real env return = -33.23


 16%|█████▉                               | 1291/8000 [51:17<4:25:12,  2.37s/it]

Episode 1291/8000, real env return = -33.18


 16%|██████                               | 1301/8000 [51:44<4:50:46,  2.60s/it]

Episode 1301/8000, real env return = -30.09


 16%|██████                               | 1311/8000 [52:08<4:30:53,  2.43s/it]

Episode 1311/8000, real env return = -30.15


 17%|██████                               | 1321/8000 [52:34<4:34:09,  2.46s/it]

Episode 1321/8000, real env return = -33.63


 17%|██████▏                              | 1331/8000 [52:58<4:34:45,  2.47s/it]

Episode 1331/8000, real env return = -33.21


 17%|██████▏                              | 1341/8000 [53:22<4:11:01,  2.26s/it]

Episode 1341/8000, real env return = -35.66


 17%|██████▏                              | 1351/8000 [53:48<4:57:37,  2.69s/it]

Episode 1351/8000, real env return = -34.16


 17%|██████▎                              | 1361/8000 [54:15<5:06:54,  2.77s/it]

Episode 1361/8000, real env return = -40.52


 17%|██████▎                              | 1371/8000 [54:39<4:11:03,  2.27s/it]

Episode 1371/8000, real env return = -35.09


 17%|██████▍                              | 1381/8000 [55:05<4:42:58,  2.57s/it]

Episode 1381/8000, real env return = -42.76


 17%|██████▍                              | 1391/8000 [55:32<4:40:59,  2.55s/it]

Episode 1391/8000, real env return = -38.44


 18%|██████▍                              | 1401/8000 [55:57<4:10:53,  2.28s/it]

Episode 1401/8000, real env return = -43.27


 18%|██████▌                              | 1411/8000 [56:24<4:48:05,  2.62s/it]

Episode 1411/8000, real env return = -32.10


 18%|██████▌                              | 1421/8000 [56:49<4:26:58,  2.43s/it]

Episode 1421/8000, real env return = -40.82


 18%|██████▌                              | 1431/8000 [57:13<3:48:25,  2.09s/it]

Episode 1431/8000, real env return = -37.21


 18%|██████▋                              | 1441/8000 [57:38<4:54:05,  2.69s/it]

Episode 1441/8000, real env return = -34.34


 18%|██████▋                              | 1451/8000 [58:05<4:34:16,  2.51s/it]

Episode 1451/8000, real env return = -28.36


 18%|██████▊                              | 1461/8000 [58:32<4:59:54,  2.75s/it]

Episode 1461/8000, real env return = -24.54


 18%|██████▊                              | 1471/8000 [58:59<4:47:50,  2.65s/it]

Episode 1471/8000, real env return = -24.70


 19%|██████▊                              | 1481/8000 [59:24<4:22:12,  2.41s/it]

Episode 1481/8000, real env return = -23.65


 19%|██████▉                              | 1491/8000 [59:49<4:28:01,  2.47s/it]

Episode 1491/8000, real env return = -35.99


 19%|██████▌                            | 1501/8000 [1:00:16<5:05:18,  2.82s/it]

Episode 1501/8000, real env return = -30.78


 19%|██████▌                            | 1511/8000 [1:00:40<3:44:54,  2.08s/it]

Episode 1511/8000, real env return = -27.81


 19%|██████▋                            | 1521/8000 [1:01:06<4:46:25,  2.65s/it]

Episode 1521/8000, real env return = -30.85


 19%|██████▋                            | 1531/8000 [1:01:32<4:29:23,  2.50s/it]

Episode 1531/8000, real env return = -43.46


 19%|██████▋                            | 1541/8000 [1:01:58<4:50:25,  2.70s/it]

Episode 1541/8000, real env return = -26.17


 19%|██████▊                            | 1551/8000 [1:02:27<5:11:57,  2.90s/it]

Episode 1551/8000, real env return = -37.62


 20%|██████▊                            | 1561/8000 [1:02:51<4:49:02,  2.69s/it]

Episode 1561/8000, real env return = -32.86


 20%|██████▊                            | 1571/8000 [1:03:15<4:25:25,  2.48s/it]

Episode 1571/8000, real env return = -31.69


 20%|██████▉                            | 1581/8000 [1:03:41<4:29:44,  2.52s/it]

Episode 1581/8000, real env return = -36.78


 20%|██████▉                            | 1591/8000 [1:04:08<4:53:23,  2.75s/it]

Episode 1591/8000, real env return = -47.89


 20%|███████                            | 1601/8000 [1:04:34<4:57:46,  2.79s/it]

Episode 1601/8000, real env return = -26.96


 20%|███████                            | 1611/8000 [1:04:59<4:11:13,  2.36s/it]

Episode 1611/8000, real env return = -35.26


 20%|███████                            | 1621/8000 [1:05:26<4:31:48,  2.56s/it]

Episode 1621/8000, real env return = -35.08


 20%|███████▏                           | 1631/8000 [1:05:49<4:26:44,  2.51s/it]

Episode 1631/8000, real env return = -117.52


 21%|███████▏                           | 1641/8000 [1:06:16<4:20:52,  2.46s/it]

Episode 1641/8000, real env return = -39.78


 21%|███████▏                           | 1651/8000 [1:06:45<4:42:10,  2.67s/it]

Episode 1651/8000, real env return = -45.26


 21%|███████▎                           | 1661/8000 [1:07:12<4:54:42,  2.79s/it]

Episode 1661/8000, real env return = -32.94


 21%|███████▎                           | 1671/8000 [1:07:36<4:15:11,  2.42s/it]

Episode 1671/8000, real env return = -113.41


 21%|███████▎                           | 1681/8000 [1:07:58<3:50:11,  2.19s/it]

Episode 1681/8000, real env return = -115.69


 21%|███████▍                           | 1691/8000 [1:08:21<3:24:49,  1.95s/it]

Episode 1691/8000, real env return = -61.38


 21%|███████▍                           | 1701/8000 [1:08:44<3:44:05,  2.13s/it]

Episode 1701/8000, real env return = -117.52


 21%|███████▍                           | 1711/8000 [1:09:10<5:08:43,  2.95s/it]

Episode 1711/8000, real env return = -27.00


 22%|███████▌                           | 1721/8000 [1:09:33<4:21:34,  2.50s/it]

Episode 1721/8000, real env return = -160.64


 22%|███████▌                           | 1731/8000 [1:09:51<2:46:21,  1.59s/it]

Episode 1731/8000, real env return = -111.35


 22%|███████▌                           | 1741/8000 [1:10:15<4:17:44,  2.47s/it]

Episode 1741/8000, real env return = -118.13


 22%|███████▋                           | 1751/8000 [1:10:38<3:37:01,  2.08s/it]

Episode 1751/8000, real env return = -111.61


 22%|███████▋                           | 1761/8000 [1:10:55<2:41:13,  1.55s/it]

Episode 1761/8000, real env return = -115.38


 22%|███████▋                           | 1771/8000 [1:11:12<3:16:26,  1.89s/it]

Episode 1771/8000, real env return = -108.31


 22%|███████▊                           | 1781/8000 [1:11:31<3:07:02,  1.80s/it]

Episode 1781/8000, real env return = -107.70


 22%|███████▊                           | 1791/8000 [1:11:46<2:52:58,  1.67s/it]

Episode 1791/8000, real env return = -108.32


 23%|███████▉                           | 1801/8000 [1:12:06<3:19:12,  1.93s/it]

Episode 1801/8000, real env return = -118.74


 23%|███████▉                           | 1811/8000 [1:12:27<3:29:35,  2.03s/it]

Episode 1811/8000, real env return = -96.86


 23%|███████▉                           | 1821/8000 [1:12:45<3:21:36,  1.96s/it]

Episode 1821/8000, real env return = -107.75


 23%|████████                           | 1831/8000 [1:13:05<3:21:56,  1.96s/it]

Episode 1831/8000, real env return = -113.45


 23%|████████                           | 1841/8000 [1:13:22<2:50:34,  1.66s/it]

Episode 1841/8000, real env return = -116.57


 23%|████████                           | 1851/8000 [1:13:41<3:00:24,  1.76s/it]

Episode 1851/8000, real env return = -107.15


 23%|████████▏                          | 1861/8000 [1:13:57<2:50:00,  1.66s/it]

Episode 1861/8000, real env return = -107.23


 23%|████████▏                          | 1871/8000 [1:14:17<3:26:20,  2.02s/it]

Episode 1871/8000, real env return = -106.52


 24%|████████▏                          | 1881/8000 [1:14:33<2:37:38,  1.55s/it]

Episode 1881/8000, real env return = -106.97


 24%|████████▎                          | 1891/8000 [1:14:51<2:45:40,  1.63s/it]

Episode 1891/8000, real env return = -113.98


 24%|████████▎                          | 1901/8000 [1:15:08<2:59:04,  1.76s/it]

Episode 1901/8000, real env return = -111.30


 24%|████████▎                          | 1911/8000 [1:15:27<2:42:06,  1.60s/it]

Episode 1911/8000, real env return = -112.87


 24%|████████▍                          | 1921/8000 [1:15:42<2:50:34,  1.68s/it]

Episode 1921/8000, real env return = -113.78


 24%|████████▍                          | 1931/8000 [1:16:00<3:32:24,  2.10s/it]

Episode 1931/8000, real env return = -115.12


 24%|████████▍                          | 1941/8000 [1:16:17<2:39:22,  1.58s/it]

Episode 1941/8000, real env return = -110.73


 24%|████████▌                          | 1951/8000 [1:16:36<2:43:16,  1.62s/it]

Episode 1951/8000, real env return = -118.28


 25%|████████▌                          | 1961/8000 [1:16:54<3:21:17,  2.00s/it]

Episode 1961/8000, real env return = -107.23


 25%|████████▌                          | 1971/8000 [1:17:15<3:30:45,  2.10s/it]

Episode 1971/8000, real env return = -110.15


 25%|████████▋                          | 1981/8000 [1:17:34<3:23:21,  2.03s/it]

Episode 1981/8000, real env return = -107.14


 25%|████████▋                          | 1991/8000 [1:17:51<3:00:10,  1.80s/it]

Episode 1991/8000, real env return = -111.88


 25%|████████▊                          | 2001/8000 [1:18:08<3:00:29,  1.81s/it]

Episode 2001/8000, real env return = -114.02


 25%|████████▊                          | 2011/8000 [1:18:26<2:54:17,  1.75s/it]

Episode 2011/8000, real env return = -110.70


 25%|████████▊                          | 2021/8000 [1:18:44<2:56:24,  1.77s/it]

Episode 2021/8000, real env return = -106.83


 25%|████████▉                          | 2031/8000 [1:19:00<3:02:34,  1.84s/it]

Episode 2031/8000, real env return = -106.47


 26%|████████▉                          | 2041/8000 [1:19:18<2:47:20,  1.68s/it]

Episode 2041/8000, real env return = -108.45


 26%|████████▉                          | 2051/8000 [1:19:35<3:18:09,  2.00s/it]

Episode 2051/8000, real env return = -111.60


 26%|█████████                          | 2061/8000 [1:19:52<2:54:46,  1.77s/it]

Episode 2061/8000, real env return = -113.78


 26%|█████████                          | 2071/8000 [1:20:10<2:45:50,  1.68s/it]

Episode 2071/8000, real env return = -106.70


 26%|█████████                          | 2081/8000 [1:20:30<3:22:17,  2.05s/it]

Episode 2081/8000, real env return = -115.41


 26%|█████████▏                         | 2091/8000 [1:20:49<2:57:26,  1.80s/it]

Episode 2091/8000, real env return = -107.89


 26%|█████████▏                         | 2101/8000 [1:21:06<2:44:01,  1.67s/it]

Episode 2101/8000, real env return = -111.17


 26%|█████████▏                         | 2111/8000 [1:21:25<2:31:38,  1.54s/it]

Episode 2111/8000, real env return = -107.17


 27%|█████████▎                         | 2121/8000 [1:21:43<2:55:26,  1.79s/it]

Episode 2121/8000, real env return = -112.56


 27%|█████████▎                         | 2131/8000 [1:22:02<2:48:40,  1.72s/it]

Episode 2131/8000, real env return = -113.65


 27%|█████████▎                         | 2141/8000 [1:22:18<2:22:42,  1.46s/it]

Episode 2141/8000, real env return = -112.76


 27%|█████████▍                         | 2151/8000 [1:22:38<3:21:01,  2.06s/it]

Episode 2151/8000, real env return = -107.45


 27%|█████████▍                         | 2161/8000 [1:22:55<2:54:29,  1.79s/it]

Episode 2161/8000, real env return = -107.73


 27%|█████████▍                         | 2171/8000 [1:23:12<2:47:12,  1.72s/it]

Episode 2171/8000, real env return = -112.33


 27%|█████████▌                         | 2181/8000 [1:23:28<2:30:37,  1.55s/it]

Episode 2181/8000, real env return = -107.51


 27%|█████████▌                         | 2191/8000 [1:23:44<2:50:37,  1.76s/it]

Episode 2191/8000, real env return = -107.49


 28%|█████████▋                         | 2201/8000 [1:24:00<2:50:06,  1.76s/it]

Episode 2201/8000, real env return = -112.91


 28%|█████████▋                         | 2211/8000 [1:24:19<3:15:15,  2.02s/it]

Episode 2211/8000, real env return = -105.88


 28%|█████████▋                         | 2221/8000 [1:24:35<2:47:33,  1.74s/it]

Episode 2221/8000, real env return = -107.22


 28%|█████████▊                         | 2231/8000 [1:24:52<2:31:56,  1.58s/it]

Episode 2231/8000, real env return = -111.22


 28%|█████████▊                         | 2241/8000 [1:25:10<2:34:58,  1.61s/it]

Episode 2241/8000, real env return = -107.37


 28%|█████████▊                         | 2251/8000 [1:25:27<2:30:42,  1.57s/it]

Episode 2251/8000, real env return = -107.88


 28%|█████████▉                         | 2261/8000 [1:25:43<2:02:06,  1.28s/it]

Episode 2261/8000, real env return = -113.65


 28%|█████████▉                         | 2271/8000 [1:26:00<2:46:38,  1.75s/it]

Episode 2271/8000, real env return = -36.48


 29%|█████████▉                         | 2281/8000 [1:26:18<3:08:09,  1.97s/it]

Episode 2281/8000, real env return = -107.62


 29%|██████████                         | 2291/8000 [1:26:38<2:50:29,  1.79s/it]

Episode 2291/8000, real env return = -109.73


 29%|██████████                         | 2301/8000 [1:26:56<3:16:30,  2.07s/it]

Episode 2301/8000, real env return = -115.45


 29%|██████████                         | 2311/8000 [1:27:12<2:37:42,  1.66s/it]

Episode 2311/8000, real env return = -106.42


 29%|██████████▏                        | 2321/8000 [1:27:30<2:48:30,  1.78s/it]

Episode 2321/8000, real env return = -115.24


 29%|██████████▏                        | 2331/8000 [1:27:48<2:38:25,  1.68s/it]

Episode 2331/8000, real env return = -114.85


 29%|██████████▏                        | 2341/8000 [1:28:03<2:09:33,  1.37s/it]

Episode 2341/8000, real env return = -106.75


 29%|██████████▎                        | 2351/8000 [1:28:21<2:51:23,  1.82s/it]

Episode 2351/8000, real env return = -107.36


 30%|██████████▎                        | 2361/8000 [1:28:39<3:03:01,  1.95s/it]

Episode 2361/8000, real env return = -106.85


 30%|██████████▎                        | 2371/8000 [1:28:54<2:20:37,  1.50s/it]

Episode 2371/8000, real env return = -115.38


 30%|██████████▍                        | 2381/8000 [1:29:10<2:52:10,  1.84s/it]

Episode 2381/8000, real env return = -116.89


 30%|██████████▍                        | 2391/8000 [1:29:29<3:07:29,  2.01s/it]

Episode 2391/8000, real env return = -113.07


 30%|██████████▌                        | 2401/8000 [1:29:43<2:23:20,  1.54s/it]

Episode 2401/8000, real env return = -106.84


 30%|██████████▌                        | 2411/8000 [1:30:01<3:04:12,  1.98s/it]

Episode 2411/8000, real env return = -53.38


 30%|██████████▌                        | 2421/8000 [1:30:19<2:18:26,  1.49s/it]

Episode 2421/8000, real env return = -111.99


 30%|██████████▋                        | 2431/8000 [1:30:36<3:08:39,  2.03s/it]

Episode 2431/8000, real env return = -112.07


 31%|██████████▋                        | 2441/8000 [1:30:53<2:37:05,  1.70s/it]

Episode 2441/8000, real env return = -105.98


 31%|██████████▋                        | 2451/8000 [1:31:09<2:40:33,  1.74s/it]

Episode 2451/8000, real env return = -106.07


 31%|██████████▊                        | 2461/8000 [1:31:25<2:27:04,  1.59s/it]

Episode 2461/8000, real env return = -113.29


 31%|██████████▊                        | 2471/8000 [1:31:45<2:56:35,  1.92s/it]

Episode 2471/8000, real env return = -105.51


 31%|██████████▊                        | 2481/8000 [1:32:04<2:53:08,  1.88s/it]

Episode 2481/8000, real env return = -107.60


 31%|██████████▉                        | 2491/8000 [1:32:19<2:31:24,  1.65s/it]

Episode 2491/8000, real env return = -106.47


 31%|██████████▉                        | 2501/8000 [1:32:36<2:50:15,  1.86s/it]

Episode 2501/8000, real env return = -113.10


 31%|██████████▉                        | 2511/8000 [1:32:50<2:18:51,  1.52s/it]

Episode 2511/8000, real env return = -111.34


 32%|███████████                        | 2521/8000 [1:33:06<1:52:25,  1.23s/it]

Episode 2521/8000, real env return = -106.62


 32%|███████████                        | 2531/8000 [1:33:20<2:04:45,  1.37s/it]

Episode 2531/8000, real env return = -111.25


 32%|███████████                        | 2541/8000 [1:33:37<2:29:49,  1.65s/it]

Episode 2541/8000, real env return = -106.37


 32%|███████████▏                       | 2551/8000 [1:33:53<2:58:15,  1.96s/it]

Episode 2551/8000, real env return = -107.28


 32%|███████████▏                       | 2561/8000 [1:34:10<2:26:54,  1.62s/it]

Episode 2561/8000, real env return = -105.78


 32%|███████████▏                       | 2571/8000 [1:34:25<2:43:35,  1.81s/it]

Episode 2571/8000, real env return = -112.67


 32%|███████████▎                       | 2581/8000 [1:34:40<2:14:35,  1.49s/it]

Episode 2581/8000, real env return = -113.92


 32%|███████████▎                       | 2591/8000 [1:34:53<2:03:51,  1.37s/it]

Episode 2591/8000, real env return = -107.44


 33%|███████████▍                       | 2601/8000 [1:35:10<2:31:08,  1.68s/it]

Episode 2601/8000, real env return = -112.77


 33%|███████████▍                       | 2611/8000 [1:35:25<2:07:58,  1.42s/it]

Episode 2611/8000, real env return = -106.46


 33%|███████████▍                       | 2621/8000 [1:35:42<2:34:04,  1.72s/it]

Episode 2621/8000, real env return = -112.34


 33%|███████████▌                       | 2631/8000 [1:35:57<2:14:30,  1.50s/it]

Episode 2631/8000, real env return = -107.43


 33%|███████████▌                       | 2641/8000 [1:36:13<2:14:19,  1.50s/it]

Episode 2641/8000, real env return = -107.21


 33%|███████████▌                       | 2651/8000 [1:36:28<2:02:00,  1.37s/it]

Episode 2651/8000, real env return = -106.81


 33%|███████████▋                       | 2661/8000 [1:36:44<2:32:39,  1.72s/it]

Episode 2661/8000, real env return = -105.24


 33%|███████████▋                       | 2671/8000 [1:37:01<2:44:27,  1.85s/it]

Episode 2671/8000, real env return = -69.58


 34%|███████████▋                       | 2681/8000 [1:37:20<2:19:46,  1.58s/it]

Episode 2681/8000, real env return = -107.46


 34%|███████████▊                       | 2691/8000 [1:37:36<2:15:12,  1.53s/it]

Episode 2691/8000, real env return = -106.18


 34%|███████████▊                       | 2701/8000 [1:37:53<2:28:24,  1.68s/it]

Episode 2701/8000, real env return = -106.00


 34%|███████████▊                       | 2711/8000 [1:38:07<1:50:40,  1.26s/it]

Episode 2711/8000, real env return = -107.01


 34%|███████████▉                       | 2721/8000 [1:38:24<2:46:45,  1.90s/it]

Episode 2721/8000, real env return = -92.14


 34%|███████████▉                       | 2731/8000 [1:38:43<3:23:22,  2.32s/it]

Episode 2731/8000, real env return = -86.74


 34%|███████████▉                       | 2741/8000 [1:39:07<3:24:02,  2.33s/it]

Episode 2741/8000, real env return = -88.17


 34%|████████████                       | 2751/8000 [1:39:32<3:33:07,  2.44s/it]

Episode 2751/8000, real env return = -48.57


 35%|████████████                       | 2761/8000 [1:39:55<3:30:09,  2.41s/it]

Episode 2761/8000, real env return = -67.14


 35%|████████████                       | 2771/8000 [1:40:17<3:13:26,  2.22s/it]

Episode 2771/8000, real env return = -73.84


 35%|████████████▏                      | 2781/8000 [1:40:38<2:42:57,  1.87s/it]

Episode 2781/8000, real env return = -71.74


 35%|████████████▏                      | 2791/8000 [1:41:02<3:15:08,  2.25s/it]

Episode 2791/8000, real env return = -77.64


 35%|████████████▎                      | 2801/8000 [1:41:25<3:41:15,  2.55s/it]

Episode 2801/8000, real env return = -58.18


 35%|████████████▎                      | 2811/8000 [1:41:45<2:49:03,  1.95s/it]

Episode 2811/8000, real env return = -77.35


 35%|████████████▎                      | 2821/8000 [1:42:05<2:37:09,  1.82s/it]

Episode 2821/8000, real env return = -67.22


 35%|████████████▍                      | 2831/8000 [1:42:26<2:57:10,  2.06s/it]

Episode 2831/8000, real env return = -90.52


 36%|████████████▍                      | 2841/8000 [1:42:48<3:27:35,  2.41s/it]

Episode 2841/8000, real env return = -73.48


 36%|████████████▍                      | 2851/8000 [1:43:09<3:04:57,  2.16s/it]

Episode 2851/8000, real env return = -59.03


 36%|████████████▌                      | 2861/8000 [1:43:32<2:50:13,  1.99s/it]

Episode 2861/8000, real env return = -55.57


 36%|████████████▌                      | 2871/8000 [1:43:51<2:49:37,  1.98s/it]

Episode 2871/8000, real env return = -68.20


 36%|████████████▌                      | 2881/8000 [1:44:14<3:08:43,  2.21s/it]

Episode 2881/8000, real env return = -58.97


 36%|████████████▋                      | 2891/8000 [1:44:39<3:19:02,  2.34s/it]

Episode 2891/8000, real env return = -62.84


 36%|████████████▋                      | 2901/8000 [1:44:58<2:45:24,  1.95s/it]

Episode 2901/8000, real env return = -60.70


 36%|████████████▋                      | 2911/8000 [1:45:20<3:39:29,  2.59s/it]

Episode 2911/8000, real env return = -66.48


 37%|████████████▊                      | 2921/8000 [1:45:44<3:15:04,  2.30s/it]

Episode 2921/8000, real env return = -64.71


 37%|████████████▊                      | 2931/8000 [1:46:08<3:33:44,  2.53s/it]

Episode 2931/8000, real env return = -76.30


 37%|████████████▊                      | 2941/8000 [1:46:30<3:27:37,  2.46s/it]

Episode 2941/8000, real env return = -70.49


 37%|████████████▉                      | 2951/8000 [1:46:51<2:40:58,  1.91s/it]

Episode 2951/8000, real env return = -68.81


 37%|████████████▉                      | 2961/8000 [1:47:12<2:28:50,  1.77s/it]

Episode 2961/8000, real env return = -63.17


 37%|████████████▉                      | 2971/8000 [1:47:34<3:11:26,  2.28s/it]

Episode 2971/8000, real env return = -85.43


 37%|█████████████                      | 2981/8000 [1:47:57<3:08:38,  2.26s/it]

Episode 2981/8000, real env return = -95.22


 37%|█████████████                      | 2991/8000 [1:48:20<2:44:03,  1.97s/it]

Episode 2991/8000, real env return = -89.36


 38%|█████████████▏                     | 3001/8000 [1:48:42<3:29:20,  2.51s/it]

Episode 3001/8000, real env return = -78.06


 38%|█████████████▏                     | 3011/8000 [1:49:05<3:23:53,  2.45s/it]

Episode 3011/8000, real env return = -69.71


 38%|█████████████▏                     | 3021/8000 [1:49:30<3:00:59,  2.18s/it]

Episode 3021/8000, real env return = -31.76


 38%|█████████████▎                     | 3031/8000 [1:49:52<3:14:40,  2.35s/it]

Episode 3031/8000, real env return = 16.85


 38%|█████████████▎                     | 3041/8000 [1:50:16<3:35:38,  2.61s/it]

Episode 3041/8000, real env return = -2.10


 38%|█████████████▎                     | 3051/8000 [1:50:37<2:35:49,  1.89s/it]

Episode 3051/8000, real env return = -124.13


 38%|█████████████▍                     | 3061/8000 [1:51:01<3:03:55,  2.23s/it]

Episode 3061/8000, real env return = 24.12


 38%|█████████████▍                     | 3071/8000 [1:51:25<3:07:44,  2.29s/it]

Episode 3071/8000, real env return = 35.34


 39%|█████████████▍                     | 3081/8000 [1:51:47<2:54:57,  2.13s/it]

Episode 3081/8000, real env return = 40.30


 39%|█████████████▌                     | 3091/8000 [1:52:11<3:09:49,  2.32s/it]

Episode 3091/8000, real env return = -21.66


 39%|█████████████▌                     | 3101/8000 [1:52:36<3:30:53,  2.58s/it]

Episode 3101/8000, real env return = -38.00


 39%|█████████████▌                     | 3111/8000 [1:53:00<3:30:00,  2.58s/it]

Episode 3111/8000, real env return = 36.27


 39%|█████████████▋                     | 3121/8000 [1:53:21<3:04:49,  2.27s/it]

Episode 3121/8000, real env return = 48.89


 39%|█████████████▋                     | 3131/8000 [1:53:43<2:55:50,  2.17s/it]

Episode 3131/8000, real env return = 37.60


 39%|█████████████▋                     | 3141/8000 [1:54:06<2:49:46,  2.10s/it]

Episode 3141/8000, real env return = 58.25


 39%|█████████████▊                     | 3151/8000 [1:54:27<2:39:23,  1.97s/it]

Episode 3151/8000, real env return = 59.22


 40%|█████████████▊                     | 3161/8000 [1:54:52<3:18:00,  2.46s/it]

Episode 3161/8000, real env return = 80.28


 40%|█████████████▊                     | 3171/8000 [1:55:15<3:08:29,  2.34s/it]

Episode 3171/8000, real env return = 64.82


 40%|█████████████▉                     | 3181/8000 [1:55:38<3:04:46,  2.30s/it]

Episode 3181/8000, real env return = 64.34


 40%|█████████████▉                     | 3191/8000 [1:56:02<3:31:55,  2.64s/it]

Episode 3191/8000, real env return = 83.28


 40%|██████████████                     | 3201/8000 [1:56:26<3:22:53,  2.54s/it]

Episode 3201/8000, real env return = 95.60


 40%|██████████████                     | 3211/8000 [1:56:52<3:31:38,  2.65s/it]

Episode 3211/8000, real env return = 106.74


 40%|██████████████                     | 3221/8000 [1:57:14<3:04:01,  2.31s/it]

Episode 3221/8000, real env return = 119.39


 40%|██████████████▏                    | 3231/8000 [1:57:36<2:57:50,  2.24s/it]

Episode 3231/8000, real env return = 125.36


 41%|██████████████▏                    | 3241/8000 [1:58:01<3:17:00,  2.48s/it]

Episode 3241/8000, real env return = 107.54


 41%|██████████████▏                    | 3251/8000 [1:58:23<3:10:36,  2.41s/it]

Episode 3251/8000, real env return = 92.98


 41%|██████████████▎                    | 3261/8000 [1:58:44<2:21:15,  1.79s/it]

Episode 3261/8000, real env return = 87.11


 41%|██████████████▎                    | 3271/8000 [1:59:07<2:58:24,  2.26s/it]

Episode 3271/8000, real env return = 57.11


 41%|██████████████▎                    | 3281/8000 [1:59:29<2:46:47,  2.12s/it]

Episode 3281/8000, real env return = 55.80


 41%|██████████████▍                    | 3291/8000 [1:59:52<3:01:56,  2.32s/it]

Episode 3291/8000, real env return = 75.84


 41%|██████████████▍                    | 3301/8000 [2:00:18<3:39:45,  2.81s/it]

Episode 3301/8000, real env return = 85.30


 41%|██████████████▍                    | 3311/8000 [2:00:43<3:00:39,  2.31s/it]

Episode 3311/8000, real env return = 102.90


 42%|██████████████▌                    | 3321/8000 [2:01:06<3:07:39,  2.41s/it]

Episode 3321/8000, real env return = 105.12


 42%|██████████████▌                    | 3331/8000 [2:01:28<2:57:18,  2.28s/it]

Episode 3331/8000, real env return = 118.14


 42%|██████████████▌                    | 3341/8000 [2:01:50<2:49:50,  2.19s/it]

Episode 3341/8000, real env return = 127.86


 42%|██████████████▋                    | 3351/8000 [2:02:16<3:08:37,  2.43s/it]

Episode 3351/8000, real env return = 116.79


 42%|██████████████▋                    | 3361/8000 [2:02:38<3:12:57,  2.50s/it]

Episode 3361/8000, real env return = 108.96


 42%|██████████████▋                    | 3371/8000 [2:03:04<3:37:07,  2.81s/it]

Episode 3371/8000, real env return = 120.80


 42%|██████████████▊                    | 3381/8000 [2:03:28<3:06:00,  2.42s/it]

Episode 3381/8000, real env return = 136.08


 42%|██████████████▊                    | 3391/8000 [2:03:49<2:59:30,  2.34s/it]

Episode 3391/8000, real env return = 125.95


 43%|██████████████▉                    | 3401/8000 [2:04:11<2:34:20,  2.01s/it]

Episode 3401/8000, real env return = 112.12


 43%|██████████████▉                    | 3411/8000 [2:04:35<2:31:36,  1.98s/it]

Episode 3411/8000, real env return = 112.66


 43%|██████████████▉                    | 3421/8000 [2:04:58<2:51:22,  2.25s/it]

Episode 3421/8000, real env return = 120.78


 43%|███████████████                    | 3431/8000 [2:05:24<3:15:18,  2.56s/it]

Episode 3431/8000, real env return = 127.74


 43%|███████████████                    | 3441/8000 [2:05:49<3:22:37,  2.67s/it]

Episode 3441/8000, real env return = 101.92


 43%|███████████████                    | 3451/8000 [2:06:15<3:31:23,  2.79s/it]

Episode 3451/8000, real env return = 148.65


 43%|███████████████▏                   | 3461/8000 [2:06:38<2:59:13,  2.37s/it]

Episode 3461/8000, real env return = 147.24


 43%|███████████████▏                   | 3471/8000 [2:07:01<3:05:22,  2.46s/it]

Episode 3471/8000, real env return = 133.47


 44%|███████████████▏                   | 3481/8000 [2:07:25<2:52:31,  2.29s/it]

Episode 3481/8000, real env return = 141.26


 44%|███████████████▎                   | 3491/8000 [2:07:49<2:50:59,  2.28s/it]

Episode 3491/8000, real env return = 124.22


 44%|███████████████▎                   | 3501/8000 [2:08:13<2:48:39,  2.25s/it]

Episode 3501/8000, real env return = 150.61


 44%|███████████████▎                   | 3511/8000 [2:08:38<3:19:36,  2.67s/it]

Episode 3511/8000, real env return = 137.86


 44%|███████████████▍                   | 3521/8000 [2:09:03<3:13:32,  2.59s/it]

Episode 3521/8000, real env return = 119.52


 44%|███████████████▍                   | 3531/8000 [2:09:29<3:03:22,  2.46s/it]

Episode 3531/8000, real env return = 129.30


 44%|███████████████▍                   | 3541/8000 [2:09:52<2:59:15,  2.41s/it]

Episode 3541/8000, real env return = 127.80


 44%|███████████████▌                   | 3551/8000 [2:10:15<2:40:49,  2.17s/it]

Episode 3551/8000, real env return = 150.76


 45%|███████████████▌                   | 3561/8000 [2:10:38<2:25:23,  1.97s/it]

Episode 3561/8000, real env return = 142.71


 45%|███████████████▌                   | 3571/8000 [2:11:03<3:08:18,  2.55s/it]

Episode 3571/8000, real env return = 141.03


 45%|███████████████▋                   | 3581/8000 [2:11:26<2:40:15,  2.18s/it]

Episode 3581/8000, real env return = 136.84


 45%|███████████████▋                   | 3591/8000 [2:11:53<3:10:41,  2.60s/it]

Episode 3591/8000, real env return = 141.79


 45%|███████████████▊                   | 3601/8000 [2:12:16<2:48:14,  2.29s/it]

Episode 3601/8000, real env return = 167.27


 45%|███████████████▊                   | 3611/8000 [2:12:40<2:34:09,  2.11s/it]

Episode 3611/8000, real env return = 181.83


 45%|███████████████▊                   | 3621/8000 [2:13:01<2:35:23,  2.13s/it]

Episode 3621/8000, real env return = 175.49


 45%|███████████████▉                   | 3631/8000 [2:13:24<2:40:47,  2.21s/it]

Episode 3631/8000, real env return = 217.48


 46%|███████████████▉                   | 3641/8000 [2:13:48<3:22:22,  2.79s/it]

Episode 3641/8000, real env return = 165.96


 46%|███████████████▉                   | 3651/8000 [2:14:13<2:56:33,  2.44s/it]

Episode 3651/8000, real env return = 158.30


 46%|████████████████                   | 3661/8000 [2:14:36<2:43:50,  2.27s/it]

Episode 3661/8000, real env return = 152.61


 46%|████████████████                   | 3671/8000 [2:15:00<2:52:39,  2.39s/it]

Episode 3671/8000, real env return = 135.84


 46%|████████████████                   | 3681/8000 [2:15:27<2:55:30,  2.44s/it]

Episode 3681/8000, real env return = 170.43


 46%|████████████████▏                  | 3691/8000 [2:15:49<2:51:54,  2.39s/it]

Episode 3691/8000, real env return = 133.77


 46%|████████████████▏                  | 3701/8000 [2:16:15<2:53:34,  2.42s/it]

Episode 3701/8000, real env return = 150.43


 46%|████████████████▏                  | 3711/8000 [2:16:42<3:05:17,  2.59s/it]

Episode 3711/8000, real env return = 156.43


 47%|████████████████▎                  | 3721/8000 [2:17:06<2:54:36,  2.45s/it]

Episode 3721/8000, real env return = 188.64


 47%|████████████████▎                  | 3731/8000 [2:17:30<2:50:49,  2.40s/it]

Episode 3731/8000, real env return = 179.12


 47%|████████████████▎                  | 3741/8000 [2:17:55<2:57:41,  2.50s/it]

Episode 3741/8000, real env return = 166.91


 47%|████████████████▍                  | 3751/8000 [2:18:19<3:06:58,  2.64s/it]

Episode 3751/8000, real env return = 215.82


 47%|████████████████▍                  | 3761/8000 [2:18:44<3:11:34,  2.71s/it]

Episode 3761/8000, real env return = 170.91


 47%|████████████████▍                  | 3771/8000 [2:19:08<2:50:44,  2.42s/it]

Episode 3771/8000, real env return = 187.71


 47%|████████████████▌                  | 3781/8000 [2:19:35<3:04:14,  2.62s/it]

Episode 3781/8000, real env return = 159.67


 47%|████████████████▌                  | 3791/8000 [2:20:01<3:25:58,  2.94s/it]

Episode 3791/8000, real env return = 172.41


 48%|████████████████▋                  | 3801/8000 [2:20:27<3:03:47,  2.63s/it]

Episode 3801/8000, real env return = 189.97


 48%|████████████████▋                  | 3811/8000 [2:20:52<3:09:48,  2.72s/it]

Episode 3811/8000, real env return = 163.57


 48%|████████████████▋                  | 3821/8000 [2:21:16<2:57:27,  2.55s/it]

Episode 3821/8000, real env return = 179.04


 48%|████████████████▊                  | 3831/8000 [2:21:41<2:44:14,  2.36s/it]

Episode 3831/8000, real env return = 158.81


 48%|████████████████▊                  | 3841/8000 [2:22:06<3:04:49,  2.67s/it]

Episode 3841/8000, real env return = 144.47


 48%|████████████████▊                  | 3851/8000 [2:22:33<2:43:21,  2.36s/it]

Episode 3851/8000, real env return = 162.92


 48%|████████████████▉                  | 3861/8000 [2:22:58<2:58:05,  2.58s/it]

Episode 3861/8000, real env return = 158.34


 48%|████████████████▉                  | 3871/8000 [2:23:23<3:14:40,  2.83s/it]

Episode 3871/8000, real env return = 223.73


 49%|████████████████▉                  | 3881/8000 [2:23:47<2:50:40,  2.49s/it]

Episode 3881/8000, real env return = 233.23


 49%|█████████████████                  | 3891/8000 [2:24:11<2:46:30,  2.43s/it]

Episode 3891/8000, real env return = 255.55


 49%|█████████████████                  | 3901/8000 [2:24:32<2:31:18,  2.21s/it]

Episode 3901/8000, real env return = 262.17


 49%|█████████████████                  | 3911/8000 [2:25:00<3:02:31,  2.68s/it]

Episode 3911/8000, real env return = 258.05


 49%|█████████████████▏                 | 3921/8000 [2:25:23<2:23:43,  2.11s/it]

Episode 3921/8000, real env return = 260.35


 49%|█████████████████▏                 | 3931/8000 [2:25:47<2:32:10,  2.24s/it]

Episode 3931/8000, real env return = 255.93


 49%|█████████████████▏                 | 3941/8000 [2:26:12<2:55:47,  2.60s/it]

Episode 3941/8000, real env return = 257.37


 49%|█████████████████▎                 | 3951/8000 [2:26:34<2:20:57,  2.09s/it]

Episode 3951/8000, real env return = 267.34


 50%|█████████████████▎                 | 3961/8000 [2:27:00<2:45:23,  2.46s/it]

Episode 3961/8000, real env return = 260.42


 50%|█████████████████▎                 | 3971/8000 [2:27:25<2:45:27,  2.46s/it]

Episode 3971/8000, real env return = 257.04


 50%|█████████████████▍                 | 3981/8000 [2:27:50<2:48:59,  2.52s/it]

Episode 3981/8000, real env return = 264.32


 50%|█████████████████▍                 | 3991/8000 [2:28:14<2:45:39,  2.48s/it]

Episode 3991/8000, real env return = 267.11


 50%|█████████████████▌                 | 4001/8000 [2:28:37<2:36:41,  2.35s/it]

Episode 4001/8000, real env return = 273.29


 50%|█████████████████▌                 | 4011/8000 [2:29:00<2:42:31,  2.44s/it]

Episode 4011/8000, real env return = 275.56


 50%|█████████████████▌                 | 4021/8000 [2:29:22<2:33:15,  2.31s/it]

Episode 4021/8000, real env return = 272.93


 50%|█████████████████▋                 | 4031/8000 [2:29:44<2:33:07,  2.31s/it]

Episode 4031/8000, real env return = 271.81


 51%|█████████████████▋                 | 4041/8000 [2:30:07<2:23:07,  2.17s/it]

Episode 4041/8000, real env return = 273.52


 51%|█████████████████▋                 | 4051/8000 [2:30:30<2:27:33,  2.24s/it]

Episode 4051/8000, real env return = 12.44


 51%|█████████████████▊                 | 4061/8000 [2:30:53<2:28:59,  2.27s/it]

Episode 4061/8000, real env return = 282.22


 51%|█████████████████▊                 | 4071/8000 [2:31:15<2:33:50,  2.35s/it]

Episode 4071/8000, real env return = 276.03


 51%|█████████████████▊                 | 4081/8000 [2:31:38<2:44:47,  2.52s/it]

Episode 4081/8000, real env return = 271.97


 51%|█████████████████▉                 | 4091/8000 [2:32:04<2:52:39,  2.65s/it]

Episode 4091/8000, real env return = 280.05


 51%|█████████████████▉                 | 4101/8000 [2:32:28<2:30:16,  2.31s/it]

Episode 4101/8000, real env return = 281.44


 51%|█████████████████▉                 | 4111/8000 [2:32:51<2:34:54,  2.39s/it]

Episode 4111/8000, real env return = 282.47


 52%|██████████████████                 | 4121/8000 [2:33:14<2:16:47,  2.12s/it]

Episode 4121/8000, real env return = 280.42


 52%|██████████████████                 | 4131/8000 [2:33:36<2:22:29,  2.21s/it]

Episode 4131/8000, real env return = 284.11


 52%|██████████████████                 | 4141/8000 [2:33:58<2:17:21,  2.14s/it]

Episode 4141/8000, real env return = 276.26


 52%|██████████████████▏                | 4151/8000 [2:34:24<2:40:12,  2.50s/it]

Episode 4151/8000, real env return = 280.97


 52%|██████████████████▏                | 4161/8000 [2:34:48<2:37:05,  2.46s/it]

Episode 4161/8000, real env return = 288.56


 52%|██████████████████▏                | 4171/8000 [2:35:12<2:44:45,  2.58s/it]

Episode 4171/8000, real env return = 276.51


 52%|██████████████████▎                | 4181/8000 [2:35:33<2:14:09,  2.11s/it]

Episode 4181/8000, real env return = 282.17


 52%|██████████████████▎                | 4191/8000 [2:35:57<2:22:29,  2.24s/it]

Episode 4191/8000, real env return = 282.05


 53%|██████████████████▍                | 4201/8000 [2:36:18<2:03:22,  1.95s/it]

Episode 4201/8000, real env return = 279.40


 53%|██████████████████▍                | 4211/8000 [2:36:39<2:14:21,  2.13s/it]

Episode 4211/8000, real env return = 284.87


 53%|██████████████████▍                | 4221/8000 [2:37:02<2:49:32,  2.69s/it]

Episode 4221/8000, real env return = 286.21


 53%|██████████████████▌                | 4231/8000 [2:37:25<2:22:40,  2.27s/it]

Episode 4231/8000, real env return = 283.86


 53%|██████████████████▌                | 4241/8000 [2:37:47<2:28:30,  2.37s/it]

Episode 4241/8000, real env return = 284.91


 53%|██████████████████▌                | 4251/8000 [2:38:07<1:52:59,  1.81s/it]

Episode 4251/8000, real env return = -59.77


 53%|██████████████████▋                | 4261/8000 [2:38:27<1:54:57,  1.84s/it]

Episode 4261/8000, real env return = -59.89


 53%|██████████████████▋                | 4271/8000 [2:38:48<2:07:32,  2.05s/it]

Episode 4271/8000, real env return = 290.11


 54%|██████████████████▋                | 4281/8000 [2:39:09<2:00:58,  1.95s/it]

Episode 4281/8000, real env return = 289.89


 54%|██████████████████▊                | 4291/8000 [2:39:31<2:09:16,  2.09s/it]

Episode 4291/8000, real env return = 289.77


 54%|██████████████████▊                | 4301/8000 [2:39:53<2:07:46,  2.07s/it]

Episode 4301/8000, real env return = 291.99


 54%|██████████████████▊                | 4311/8000 [2:40:16<2:31:57,  2.47s/it]

Episode 4311/8000, real env return = 284.78


 54%|██████████████████▉                | 4321/8000 [2:40:38<2:27:25,  2.40s/it]

Episode 4321/8000, real env return = 285.78


 54%|██████████████████▉                | 4331/8000 [2:40:59<2:12:56,  2.17s/it]

Episode 4331/8000, real env return = 270.63


 54%|██████████████████▉                | 4341/8000 [2:41:23<2:21:57,  2.33s/it]

Episode 4341/8000, real env return = 291.11


 54%|███████████████████                | 4351/8000 [2:41:46<2:19:48,  2.30s/it]

Episode 4351/8000, real env return = 288.46


 55%|███████████████████                | 4361/8000 [2:42:08<2:12:20,  2.18s/it]

Episode 4361/8000, real env return = 288.12


 55%|███████████████████                | 4371/8000 [2:42:29<2:23:33,  2.37s/it]

Episode 4371/8000, real env return = 254.51


 55%|███████████████████▏               | 4381/8000 [2:42:52<2:16:04,  2.26s/it]

Episode 4381/8000, real env return = 291.96


 55%|███████████████████▏               | 4391/8000 [2:43:16<2:26:56,  2.44s/it]

Episode 4391/8000, real env return = 294.84


 55%|███████████████████▎               | 4401/8000 [2:43:37<2:04:03,  2.07s/it]

Episode 4401/8000, real env return = 287.23


 55%|███████████████████▎               | 4411/8000 [2:43:57<2:09:01,  2.16s/it]

Episode 4411/8000, real env return = 291.14


 55%|███████████████████▎               | 4421/8000 [2:44:21<2:06:22,  2.12s/it]

Episode 4421/8000, real env return = 287.87


 55%|███████████████████▍               | 4431/8000 [2:44:44<2:20:46,  2.37s/it]

Episode 4431/8000, real env return = 291.08


 56%|███████████████████▍               | 4441/8000 [2:45:05<2:02:28,  2.06s/it]

Episode 4441/8000, real env return = 291.36


 56%|███████████████████▍               | 4451/8000 [2:45:30<2:33:39,  2.60s/it]

Episode 4451/8000, real env return = 292.06


 56%|███████████████████▌               | 4461/8000 [2:45:53<2:27:20,  2.50s/it]

Episode 4461/8000, real env return = 289.63


 56%|███████████████████▌               | 4471/8000 [2:46:17<2:27:46,  2.51s/it]

Episode 4471/8000, real env return = 292.71


 56%|███████████████████▌               | 4481/8000 [2:46:39<2:07:58,  2.18s/it]

Episode 4481/8000, real env return = 287.23


 56%|███████████████████▋               | 4491/8000 [2:47:00<2:08:36,  2.20s/it]

Episode 4491/8000, real env return = 291.82


 56%|███████████████████▋               | 4501/8000 [2:47:21<2:00:58,  2.07s/it]

Episode 4501/8000, real env return = 291.02


 56%|███████████████████▋               | 4511/8000 [2:47:41<1:43:06,  1.77s/it]

Episode 4511/8000, real env return = 23.63


 57%|███████████████████▊               | 4521/8000 [2:48:03<2:07:33,  2.20s/it]

Episode 4521/8000, real env return = 291.39


 57%|███████████████████▊               | 4531/8000 [2:48:23<1:54:54,  1.99s/it]

Episode 4531/8000, real env return = 295.47


 57%|███████████████████▊               | 4541/8000 [2:48:46<2:10:05,  2.26s/it]

Episode 4541/8000, real env return = 296.27


 57%|███████████████████▉               | 4551/8000 [2:49:09<2:18:37,  2.41s/it]

Episode 4551/8000, real env return = 292.80


 57%|███████████████████▉               | 4561/8000 [2:49:32<2:20:20,  2.45s/it]

Episode 4561/8000, real env return = 295.21


 57%|███████████████████▉               | 4571/8000 [2:49:53<2:08:48,  2.25s/it]

Episode 4571/8000, real env return = 297.66


 57%|████████████████████               | 4581/8000 [2:50:15<1:54:24,  2.01s/it]

Episode 4581/8000, real env return = 292.87


 57%|████████████████████               | 4591/8000 [2:50:38<2:19:31,  2.46s/it]

Episode 4591/8000, real env return = 296.98


 58%|████████████████████▏              | 4601/8000 [2:50:59<1:56:15,  2.05s/it]

Episode 4601/8000, real env return = 292.16


 58%|████████████████████▏              | 4611/8000 [2:51:22<2:12:21,  2.34s/it]

Episode 4611/8000, real env return = 290.61


 58%|████████████████████▏              | 4621/8000 [2:51:43<2:00:46,  2.14s/it]

Episode 4621/8000, real env return = 294.64


 58%|████████████████████▎              | 4631/8000 [2:52:02<1:40:15,  1.79s/it]

Episode 4631/8000, real env return = 294.56


 58%|████████████████████▎              | 4641/8000 [2:52:24<2:04:36,  2.23s/it]

Episode 4641/8000, real env return = 292.23


 58%|████████████████████▎              | 4651/8000 [2:52:45<1:52:20,  2.01s/it]

Episode 4651/8000, real env return = 294.21


 58%|████████████████████▍              | 4661/8000 [2:53:08<2:06:00,  2.26s/it]

Episode 4661/8000, real env return = 292.25


 58%|████████████████████▍              | 4671/8000 [2:53:31<2:07:29,  2.30s/it]

Episode 4671/8000, real env return = 293.04


 59%|████████████████████▍              | 4681/8000 [2:53:52<1:56:52,  2.11s/it]

Episode 4681/8000, real env return = 287.51


 59%|████████████████████▌              | 4691/8000 [2:54:12<1:59:49,  2.17s/it]

Episode 4691/8000, real env return = 294.48


 59%|████████████████████▌              | 4701/8000 [2:54:35<1:58:35,  2.16s/it]

Episode 4701/8000, real env return = 286.71


 59%|████████████████████▌              | 4711/8000 [2:54:58<2:18:00,  2.52s/it]

Episode 4711/8000, real env return = 281.76


 59%|████████████████████▋              | 4721/8000 [2:55:20<1:58:20,  2.17s/it]

Episode 4721/8000, real env return = 291.47


 59%|████████████████████▋              | 4731/8000 [2:55:44<2:16:11,  2.50s/it]

Episode 4731/8000, real env return = 287.84


 59%|████████████████████▋              | 4741/8000 [2:56:06<1:55:45,  2.13s/it]

Episode 4741/8000, real env return = 288.92


 59%|████████████████████▊              | 4751/8000 [2:56:28<2:16:47,  2.53s/it]

Episode 4751/8000, real env return = 293.43


 60%|████████████████████▊              | 4761/8000 [2:56:50<2:01:49,  2.26s/it]

Episode 4761/8000, real env return = 293.11


 60%|████████████████████▊              | 4771/8000 [2:57:12<1:50:54,  2.06s/it]

Episode 4771/8000, real env return = 292.05


 60%|████████████████████▉              | 4781/8000 [2:57:34<1:56:10,  2.17s/it]

Episode 4781/8000, real env return = 286.00


 60%|████████████████████▉              | 4791/8000 [2:57:59<2:07:40,  2.39s/it]

Episode 4791/8000, real env return = 292.42


 60%|█████████████████████              | 4801/8000 [2:58:19<1:41:59,  1.91s/it]

Episode 4801/8000, real env return = 138.73


 60%|█████████████████████              | 4811/8000 [2:58:39<1:42:45,  1.93s/it]

Episode 4811/8000, real env return = 103.82


 60%|█████████████████████              | 4821/8000 [2:59:01<1:48:59,  2.06s/it]

Episode 4821/8000, real env return = 290.73


 60%|█████████████████████▏             | 4831/8000 [2:59:24<1:57:38,  2.23s/it]

Episode 4831/8000, real env return = 292.29


 61%|█████████████████████▏             | 4841/8000 [2:59:45<1:32:36,  1.76s/it]

Episode 4841/8000, real env return = 295.64


 61%|█████████████████████▏             | 4851/8000 [3:00:06<1:46:32,  2.03s/it]

Episode 4851/8000, real env return = 291.15


 61%|█████████████████████▎             | 4861/8000 [3:00:27<1:53:07,  2.16s/it]

Episode 4861/8000, real env return = 295.78


 61%|█████████████████████▎             | 4871/8000 [3:00:47<1:51:18,  2.13s/it]

Episode 4871/8000, real env return = 289.73


 61%|█████████████████████▎             | 4881/8000 [3:01:10<1:50:31,  2.13s/it]

Episode 4881/8000, real env return = 294.41


 61%|█████████████████████▍             | 4891/8000 [3:01:33<1:50:15,  2.13s/it]

Episode 4891/8000, real env return = 289.72


 61%|█████████████████████▍             | 4901/8000 [3:01:55<1:57:06,  2.27s/it]

Episode 4901/8000, real env return = 295.27


 61%|█████████████████████▍             | 4911/8000 [3:02:15<1:54:57,  2.23s/it]

Episode 4911/8000, real env return = 292.54


 62%|█████████████████████▌             | 4921/8000 [3:02:38<2:04:14,  2.42s/it]

Episode 4921/8000, real env return = 292.43


 62%|█████████████████████▌             | 4931/8000 [3:03:00<1:55:17,  2.25s/it]

Episode 4931/8000, real env return = 294.81


 62%|█████████████████████▌             | 4941/8000 [3:03:20<1:40:33,  1.97s/it]

Episode 4941/8000, real env return = 296.21


 62%|█████████████████████▋             | 4951/8000 [3:03:41<1:50:12,  2.17s/it]

Episode 4951/8000, real env return = 270.21


 62%|█████████████████████▋             | 4961/8000 [3:04:03<1:47:53,  2.13s/it]

Episode 4961/8000, real env return = 287.14


 62%|█████████████████████▋             | 4971/8000 [3:04:26<1:49:23,  2.17s/it]

Episode 4971/8000, real env return = 288.16


 62%|█████████████████████▊             | 4981/8000 [3:04:47<1:58:25,  2.35s/it]

Episode 4981/8000, real env return = 293.84


 62%|█████████████████████▊             | 4991/8000 [3:05:10<2:01:01,  2.41s/it]

Episode 4991/8000, real env return = -14.15


 63%|█████████████████████▉             | 5001/8000 [3:05:32<1:56:43,  2.34s/it]

Episode 5001/8000, real env return = 298.47


 63%|█████████████████████▉             | 5011/8000 [3:05:55<1:48:31,  2.18s/it]

Episode 5011/8000, real env return = 292.08


 63%|█████████████████████▉             | 5021/8000 [3:06:19<2:06:28,  2.55s/it]

Episode 5021/8000, real env return = 293.90


 63%|██████████████████████             | 5031/8000 [3:06:40<1:54:01,  2.30s/it]

Episode 5031/8000, real env return = 295.45


 63%|██████████████████████             | 5041/8000 [3:07:01<1:43:44,  2.10s/it]

Episode 5041/8000, real env return = 290.32


 63%|██████████████████████             | 5051/8000 [3:07:21<1:42:31,  2.09s/it]

Episode 5051/8000, real env return = 295.03


 63%|██████████████████████▏            | 5061/8000 [3:07:45<1:58:50,  2.43s/it]

Episode 5061/8000, real env return = 296.71


 63%|██████████████████████▏            | 5071/8000 [3:08:11<2:10:58,  2.68s/it]

Episode 5071/8000, real env return = 297.53


 64%|██████████████████████▏            | 5081/8000 [3:08:32<1:47:26,  2.21s/it]

Episode 5081/8000, real env return = 295.24


 64%|██████████████████████▎            | 5091/8000 [3:08:55<1:39:38,  2.06s/it]

Episode 5091/8000, real env return = 291.85


 64%|██████████████████████▎            | 5101/8000 [3:09:14<1:40:44,  2.09s/it]

Episode 5101/8000, real env return = 298.13


 64%|██████████████████████▎            | 5111/8000 [3:09:34<1:41:21,  2.10s/it]

Episode 5111/8000, real env return = 299.30


 64%|██████████████████████▍            | 5121/8000 [3:09:56<1:49:06,  2.27s/it]

Episode 5121/8000, real env return = 298.39


 64%|██████████████████████▍            | 5131/8000 [3:10:18<1:51:44,  2.34s/it]

Episode 5131/8000, real env return = 294.06


 64%|██████████████████████▍            | 5141/8000 [3:10:41<1:55:31,  2.42s/it]

Episode 5141/8000, real env return = 295.62


 64%|██████████████████████▌            | 5151/8000 [3:11:04<1:58:48,  2.50s/it]

Episode 5151/8000, real env return = 297.35


 65%|██████████████████████▌            | 5161/8000 [3:11:26<1:48:39,  2.30s/it]

Episode 5161/8000, real env return = 299.01


 65%|██████████████████████▌            | 5171/8000 [3:11:50<1:47:05,  2.27s/it]

Episode 5171/8000, real env return = 293.39


 65%|██████████████████████▋            | 5181/8000 [3:12:12<1:43:30,  2.20s/it]

Episode 5181/8000, real env return = 295.22


 65%|██████████████████████▋            | 5191/8000 [3:12:34<1:44:51,  2.24s/it]

Episode 5191/8000, real env return = 298.69


 65%|██████████████████████▊            | 5201/8000 [3:12:56<1:44:00,  2.23s/it]

Episode 5201/8000, real env return = 296.30


 65%|██████████████████████▊            | 5211/8000 [3:13:18<1:44:56,  2.26s/it]

Episode 5211/8000, real env return = 299.67


 65%|██████████████████████▊            | 5221/8000 [3:13:42<1:54:37,  2.47s/it]

Episode 5221/8000, real env return = 299.96


 65%|██████████████████████▉            | 5231/8000 [3:14:03<1:38:45,  2.14s/it]

Episode 5231/8000, real env return = 297.89


 66%|██████████████████████▉            | 5241/8000 [3:14:25<1:43:13,  2.24s/it]

Episode 5241/8000, real env return = 295.22


 66%|██████████████████████▉            | 5251/8000 [3:14:49<1:42:35,  2.24s/it]

Episode 5251/8000, real env return = 296.08


 66%|███████████████████████            | 5261/8000 [3:15:09<1:30:49,  1.99s/it]

Episode 5261/8000, real env return = 289.67


 66%|███████████████████████            | 5271/8000 [3:15:30<1:29:33,  1.97s/it]

Episode 5271/8000, real env return = 296.07


 66%|███████████████████████            | 5281/8000 [3:15:50<1:39:07,  2.19s/it]

Episode 5281/8000, real env return = 291.40


 66%|███████████████████████▏           | 5291/8000 [3:16:11<1:36:50,  2.14s/it]

Episode 5291/8000, real env return = 92.35


 66%|███████████████████████▏           | 5301/8000 [3:16:33<1:43:28,  2.30s/it]

Episode 5301/8000, real env return = 295.30


 66%|███████████████████████▏           | 5311/8000 [3:16:53<1:33:14,  2.08s/it]

Episode 5311/8000, real env return = 295.99


 67%|███████████████████████▎           | 5321/8000 [3:17:14<1:35:28,  2.14s/it]

Episode 5321/8000, real env return = 293.25


 67%|███████████████████████▎           | 5331/8000 [3:17:37<1:25:46,  1.93s/it]

Episode 5331/8000, real env return = 298.77


 67%|███████████████████████▎           | 5341/8000 [3:17:57<1:30:21,  2.04s/it]

Episode 5341/8000, real env return = 106.51


 67%|███████████████████████▍           | 5351/8000 [3:18:20<1:42:19,  2.32s/it]

Episode 5351/8000, real env return = 298.38


 67%|███████████████████████▍           | 5361/8000 [3:18:43<1:34:13,  2.14s/it]

Episode 5361/8000, real env return = 299.13


 67%|███████████████████████▍           | 5371/8000 [3:19:02<1:19:29,  1.81s/it]

Episode 5371/8000, real env return = 301.37


 67%|███████████████████████▌           | 5381/8000 [3:19:24<1:30:01,  2.06s/it]

Episode 5381/8000, real env return = 299.99


 67%|███████████████████████▌           | 5391/8000 [3:19:47<1:39:18,  2.28s/it]

Episode 5391/8000, real env return = 300.87


 68%|███████████████████████▋           | 5401/8000 [3:20:09<1:29:41,  2.07s/it]

Episode 5401/8000, real env return = 300.99


 68%|███████████████████████▋           | 5411/8000 [3:20:28<1:21:12,  1.88s/it]

Episode 5411/8000, real env return = 302.41


 68%|███████████████████████▋           | 5421/8000 [3:20:49<1:34:15,  2.19s/it]

Episode 5421/8000, real env return = 303.21


 68%|███████████████████████▊           | 5431/8000 [3:21:09<1:13:51,  1.72s/it]

Episode 5431/8000, real env return = 301.03


 68%|███████████████████████▊           | 5441/8000 [3:21:31<1:39:54,  2.34s/it]

Episode 5441/8000, real env return = 300.82


 68%|███████████████████████▊           | 5451/8000 [3:21:52<1:31:00,  2.14s/it]

Episode 5451/8000, real env return = 301.11


 68%|███████████████████████▉           | 5461/8000 [3:22:11<1:17:30,  1.83s/it]

Episode 5461/8000, real env return = 301.02


 68%|███████████████████████▉           | 5471/8000 [3:22:31<1:22:15,  1.95s/it]

Episode 5471/8000, real env return = 299.24


 69%|███████████████████████▉           | 5481/8000 [3:22:52<1:31:48,  2.19s/it]

Episode 5481/8000, real env return = 299.95


 69%|████████████████████████           | 5491/8000 [3:23:13<1:35:42,  2.29s/it]

Episode 5491/8000, real env return = 301.66


 69%|████████████████████████           | 5501/8000 [3:23:36<1:36:32,  2.32s/it]

Episode 5501/8000, real env return = 300.91


 69%|████████████████████████           | 5511/8000 [3:24:00<1:41:59,  2.46s/it]

Episode 5511/8000, real env return = 302.08


 69%|████████████████████████▏          | 5521/8000 [3:24:20<1:26:20,  2.09s/it]

Episode 5521/8000, real env return = 303.15


 69%|████████████████████████▏          | 5531/8000 [3:24:43<1:29:53,  2.18s/it]

Episode 5531/8000, real env return = 300.95


 69%|████████████████████████▏          | 5541/8000 [3:25:04<1:26:18,  2.11s/it]

Episode 5541/8000, real env return = 299.55


 69%|████████████████████████▎          | 5551/8000 [3:25:24<1:31:10,  2.23s/it]

Episode 5551/8000, real env return = 297.22


 70%|████████████████████████▎          | 5561/8000 [3:25:45<1:26:18,  2.12s/it]

Episode 5561/8000, real env return = 299.62


 70%|████████████████████████▎          | 5571/8000 [3:26:07<1:30:28,  2.24s/it]

Episode 5571/8000, real env return = 300.09


 70%|████████████████████████▍          | 5581/8000 [3:26:26<1:22:48,  2.05s/it]

Episode 5581/8000, real env return = 300.10


 70%|████████████████████████▍          | 5591/8000 [3:26:46<1:20:18,  2.00s/it]

Episode 5591/8000, real env return = 301.88


 70%|████████████████████████▌          | 5601/8000 [3:27:06<1:24:05,  2.10s/it]

Episode 5601/8000, real env return = 298.66


 70%|████████████████████████▌          | 5611/8000 [3:27:27<1:16:42,  1.93s/it]

Episode 5611/8000, real env return = 301.58


 70%|████████████████████████▌          | 5621/8000 [3:27:48<1:21:08,  2.05s/it]

Episode 5621/8000, real env return = 298.51


 70%|████████████████████████▋          | 5631/8000 [3:28:10<1:32:53,  2.35s/it]

Episode 5631/8000, real env return = 298.57


 71%|████████████████████████▋          | 5641/8000 [3:28:32<1:25:11,  2.17s/it]

Episode 5641/8000, real env return = 295.46


 71%|████████████████████████▋          | 5651/8000 [3:28:53<1:28:05,  2.25s/it]

Episode 5651/8000, real env return = 295.30


 71%|████████████████████████▊          | 5661/8000 [3:29:15<1:22:43,  2.12s/it]

Episode 5661/8000, real env return = 296.40


 71%|████████████████████████▊          | 5671/8000 [3:29:37<1:23:56,  2.16s/it]

Episode 5671/8000, real env return = 296.84


 71%|████████████████████████▊          | 5681/8000 [3:30:02<1:24:59,  2.20s/it]

Episode 5681/8000, real env return = 295.34


 71%|████████████████████████▉          | 5691/8000 [3:30:22<1:20:51,  2.10s/it]

Episode 5691/8000, real env return = 295.79


 71%|████████████████████████▉          | 5701/8000 [3:30:42<1:18:52,  2.06s/it]

Episode 5701/8000, real env return = 287.55


 71%|████████████████████████▉          | 5711/8000 [3:31:05<1:25:20,  2.24s/it]

Episode 5711/8000, real env return = 302.76


 72%|█████████████████████████          | 5721/8000 [3:31:27<1:17:20,  2.04s/it]

Episode 5721/8000, real env return = 298.42


 72%|█████████████████████████          | 5731/8000 [3:31:51<1:33:33,  2.47s/it]

Episode 5731/8000, real env return = 288.08


 72%|█████████████████████████          | 5741/8000 [3:32:13<1:10:51,  1.88s/it]

Episode 5741/8000, real env return = 299.94


 72%|█████████████████████████▏         | 5751/8000 [3:32:34<1:23:00,  2.21s/it]

Episode 5751/8000, real env return = 301.30


 72%|█████████████████████████▏         | 5761/8000 [3:32:56<1:17:54,  2.09s/it]

Episode 5761/8000, real env return = 288.64


 72%|█████████████████████████▏         | 5771/8000 [3:33:16<1:17:28,  2.09s/it]

Episode 5771/8000, real env return = 293.25


 72%|█████████████████████████▎         | 5781/8000 [3:33:38<1:20:46,  2.18s/it]

Episode 5781/8000, real env return = 296.91


 72%|█████████████████████████▎         | 5791/8000 [3:33:58<1:17:52,  2.12s/it]

Episode 5791/8000, real env return = 295.14


 73%|█████████████████████████▍         | 5801/8000 [3:34:17<1:09:28,  1.90s/it]

Episode 5801/8000, real env return = 295.62


 73%|█████████████████████████▍         | 5811/8000 [3:34:38<1:15:26,  2.07s/it]

Episode 5811/8000, real env return = 301.01


 73%|█████████████████████████▍         | 5821/8000 [3:35:01<1:24:02,  2.31s/it]

Episode 5821/8000, real env return = 292.22


 73%|█████████████████████████▌         | 5831/8000 [3:35:23<1:25:08,  2.36s/it]

Episode 5831/8000, real env return = 298.01


 73%|█████████████████████████▌         | 5841/8000 [3:35:44<1:12:02,  2.00s/it]

Episode 5841/8000, real env return = 296.48


 73%|█████████████████████████▌         | 5851/8000 [3:36:07<1:18:48,  2.20s/it]

Episode 5851/8000, real env return = 300.87


 73%|█████████████████████████▋         | 5861/8000 [3:36:31<1:30:54,  2.55s/it]

Episode 5861/8000, real env return = 300.86


 73%|█████████████████████████▋         | 5871/8000 [3:36:52<1:20:54,  2.28s/it]

Episode 5871/8000, real env return = 297.92


 74%|█████████████████████████▋         | 5881/8000 [3:37:12<1:05:32,  1.86s/it]

Episode 5881/8000, real env return = 297.76


 74%|█████████████████████████▊         | 5891/8000 [3:37:33<1:07:53,  1.93s/it]

Episode 5891/8000, real env return = 297.03


 74%|█████████████████████████▊         | 5901/8000 [3:37:54<1:12:39,  2.08s/it]

Episode 5901/8000, real env return = 294.43


 74%|█████████████████████████▊         | 5911/8000 [3:38:14<1:12:15,  2.08s/it]

Episode 5911/8000, real env return = 293.02


 74%|█████████████████████████▉         | 5921/8000 [3:38:35<1:13:24,  2.12s/it]

Episode 5921/8000, real env return = 293.87


 74%|█████████████████████████▉         | 5931/8000 [3:38:57<1:17:05,  2.24s/it]

Episode 5931/8000, real env return = 295.48


 74%|█████████████████████████▉         | 5941/8000 [3:39:19<1:16:45,  2.24s/it]

Episode 5941/8000, real env return = 296.24


 74%|██████████████████████████         | 5951/8000 [3:39:42<1:15:44,  2.22s/it]

Episode 5951/8000, real env return = 297.33


 75%|██████████████████████████         | 5961/8000 [3:40:06<1:20:10,  2.36s/it]

Episode 5961/8000, real env return = 300.02


 75%|██████████████████████████         | 5971/8000 [3:40:27<1:13:23,  2.17s/it]

Episode 5971/8000, real env return = 295.31


 75%|██████████████████████████▏        | 5981/8000 [3:40:48<1:05:57,  1.96s/it]

Episode 5981/8000, real env return = 298.83


 75%|██████████████████████████▏        | 5991/8000 [3:41:12<1:23:30,  2.49s/it]

Episode 5991/8000, real env return = 298.98


 75%|██████████████████████████▎        | 6001/8000 [3:41:32<1:17:42,  2.33s/it]

Episode 6001/8000, real env return = 303.40


 75%|██████████████████████████▎        | 6011/8000 [3:41:54<1:19:06,  2.39s/it]

Episode 6011/8000, real env return = 302.60


 75%|██████████████████████████▎        | 6021/8000 [3:42:15<1:16:12,  2.31s/it]

Episode 6021/8000, real env return = 299.60


 75%|██████████████████████████▍        | 6031/8000 [3:42:36<1:06:20,  2.02s/it]

Episode 6031/8000, real env return = 300.29


 76%|██████████████████████████▍        | 6041/8000 [3:42:58<1:11:50,  2.20s/it]

Episode 6041/8000, real env return = 292.61


 76%|██████████████████████████▍        | 6051/8000 [3:43:20<1:14:02,  2.28s/it]

Episode 6051/8000, real env return = 285.60


 76%|██████████████████████████▌        | 6061/8000 [3:43:40<1:08:42,  2.13s/it]

Episode 6061/8000, real env return = 298.04


 76%|██████████████████████████▌        | 6071/8000 [3:44:02<1:11:34,  2.23s/it]

Episode 6071/8000, real env return = 300.36


 76%|██████████████████████████▌        | 6081/8000 [3:44:23<1:05:47,  2.06s/it]

Episode 6081/8000, real env return = 303.44


 76%|██████████████████████████▋        | 6091/8000 [3:44:46<1:09:30,  2.18s/it]

Episode 6091/8000, real env return = 298.68


 76%|██████████████████████████▋        | 6101/8000 [3:45:09<1:14:35,  2.36s/it]

Episode 6101/8000, real env return = 298.60


 76%|██████████████████████████▋        | 6111/8000 [3:45:30<1:09:10,  2.20s/it]

Episode 6111/8000, real env return = 305.28


 77%|██████████████████████████▊        | 6121/8000 [3:45:51<1:02:26,  1.99s/it]

Episode 6121/8000, real env return = 300.04


 77%|██████████████████████████▊        | 6131/8000 [3:46:10<1:04:13,  2.06s/it]

Episode 6131/8000, real env return = 297.62


 77%|██████████████████████████▊        | 6141/8000 [3:46:34<1:14:22,  2.40s/it]

Episode 6141/8000, real env return = 297.80


 77%|██████████████████████████▉        | 6151/8000 [3:46:56<1:10:28,  2.29s/it]

Episode 6151/8000, real env return = 298.08


 77%|██████████████████████████▉        | 6161/8000 [3:47:18<1:08:41,  2.24s/it]

Episode 6161/8000, real env return = 299.65


 77%|██████████████████████████▉        | 6171/8000 [3:47:38<1:05:51,  2.16s/it]

Episode 6171/8000, real env return = 299.65


 77%|███████████████████████████        | 6181/8000 [3:48:01<1:06:43,  2.20s/it]

Episode 6181/8000, real env return = 299.70


 77%|███████████████████████████        | 6191/8000 [3:48:22<1:03:20,  2.10s/it]

Episode 6191/8000, real env return = 299.98


 78%|███████████████████████████▏       | 6201/8000 [3:48:42<1:00:29,  2.02s/it]

Episode 6201/8000, real env return = 299.98


 78%|███████████████████████████▏       | 6211/8000 [3:49:04<1:04:09,  2.15s/it]

Episode 6211/8000, real env return = 301.80


 78%|███████████████████████████▏       | 6221/8000 [3:49:26<1:10:28,  2.38s/it]

Episode 6221/8000, real env return = 303.87


 78%|███████████████████████████▎       | 6231/8000 [3:49:49<1:12:45,  2.47s/it]

Episode 6231/8000, real env return = 293.47


 78%|████████████████████████████▊        | 6241/8000 [3:50:09<57:48,  1.97s/it]

Episode 6241/8000, real env return = 303.00


 78%|████████████████████████████▉        | 6251/8000 [3:50:31<56:23,  1.93s/it]

Episode 6251/8000, real env return = 298.85


 78%|███████████████████████████▍       | 6261/8000 [3:50:55<1:03:05,  2.18s/it]

Episode 6261/8000, real env return = 298.76


 78%|█████████████████████████████        | 6271/8000 [3:51:17<55:24,  1.92s/it]

Episode 6271/8000, real env return = 302.85


 79%|███████████████████████████▍       | 6281/8000 [3:51:41<1:14:04,  2.59s/it]

Episode 6281/8000, real env return = 300.94


 79%|███████████████████████████▌       | 6291/8000 [3:52:02<1:00:51,  2.14s/it]

Episode 6291/8000, real env return = 304.74


 79%|█████████████████████████████▏       | 6301/8000 [3:52:22<51:03,  1.80s/it]

Episode 6301/8000, real env return = 302.52


 79%|█████████████████████████████▏       | 6311/8000 [3:52:43<59:01,  2.10s/it]

Episode 6311/8000, real env return = 303.43


 79%|█████████████████████████████▏       | 6321/8000 [3:53:04<56:34,  2.02s/it]

Episode 6321/8000, real env return = 303.01


 79%|█████████████████████████████▎       | 6331/8000 [3:53:25<55:03,  1.98s/it]

Episode 6331/8000, real env return = 302.54


 79%|█████████████████████████████▎       | 6341/8000 [3:53:46<56:34,  2.05s/it]

Episode 6341/8000, real env return = 301.99


 79%|█████████████████████████████▎       | 6351/8000 [3:54:06<54:54,  2.00s/it]

Episode 6351/8000, real env return = 303.58


 80%|█████████████████████████████▍       | 6361/8000 [3:54:27<56:24,  2.07s/it]

Episode 6361/8000, real env return = 302.92


 80%|█████████████████████████████▍       | 6371/8000 [3:54:48<54:22,  2.00s/it]

Episode 6371/8000, real env return = 305.58


 80%|█████████████████████████████▌       | 6381/8000 [3:55:09<54:35,  2.02s/it]

Episode 6381/8000, real env return = 301.88


 80%|███████████████████████████▉       | 6391/8000 [3:55:33<1:04:16,  2.40s/it]

Episode 6391/8000, real env return = 298.84


 80%|█████████████████████████████▌       | 6401/8000 [3:55:56<59:49,  2.24s/it]

Episode 6401/8000, real env return = 301.46


 80%|█████████████████████████████▋       | 6411/8000 [3:56:19<58:36,  2.21s/it]

Episode 6411/8000, real env return = 299.17


 80%|█████████████████████████████▋       | 6421/8000 [3:56:39<54:27,  2.07s/it]

Episode 6421/8000, real env return = 303.56


 80%|█████████████████████████████▋       | 6431/8000 [3:57:02<57:59,  2.22s/it]

Episode 6431/8000, real env return = 303.64


 81%|█████████████████████████████▊       | 6441/8000 [3:57:25<59:28,  2.29s/it]

Episode 6441/8000, real env return = 303.72


 81%|█████████████████████████████▊       | 6451/8000 [3:57:49<59:50,  2.32s/it]

Episode 6451/8000, real env return = 304.34


 81%|█████████████████████████████▉       | 6461/8000 [3:58:10<51:01,  1.99s/it]

Episode 6461/8000, real env return = 6.12


 81%|█████████████████████████████▉       | 6471/8000 [3:58:33<58:27,  2.29s/it]

Episode 6471/8000, real env return = 299.34


 81%|█████████████████████████████▉       | 6481/8000 [3:58:52<47:19,  1.87s/it]

Episode 6481/8000, real env return = 297.10


 81%|██████████████████████████████       | 6491/8000 [3:59:13<56:15,  2.24s/it]

Episode 6491/8000, real env return = 295.66


 81%|██████████████████████████████       | 6501/8000 [3:59:34<56:44,  2.27s/it]

Episode 6501/8000, real env return = 299.62


 81%|██████████████████████████████       | 6511/8000 [3:59:54<52:04,  2.10s/it]

Episode 6511/8000, real env return = 293.87


 82%|██████████████████████████████▏      | 6521/8000 [4:00:15<52:27,  2.13s/it]

Episode 6521/8000, real env return = 300.72


 82%|██████████████████████████████▏      | 6531/8000 [4:00:36<50:33,  2.06s/it]

Episode 6531/8000, real env return = 300.76


 82%|██████████████████████████████▎      | 6541/8000 [4:00:57<51:54,  2.13s/it]

Episode 6541/8000, real env return = 298.69


 82%|██████████████████████████████▎      | 6551/8000 [4:01:22<57:53,  2.40s/it]

Episode 6551/8000, real env return = 300.12


 82%|████████████████████████████▋      | 6561/8000 [4:01:46<1:00:56,  2.54s/it]

Episode 6561/8000, real env return = 300.15


 82%|██████████████████████████████▍      | 6571/8000 [4:02:08<54:13,  2.28s/it]

Episode 6571/8000, real env return = 294.20


 82%|██████████████████████████████▍      | 6581/8000 [4:02:31<52:04,  2.20s/it]

Episode 6581/8000, real env return = 296.11


 82%|██████████████████████████████▍      | 6591/8000 [4:02:53<49:19,  2.10s/it]

Episode 6591/8000, real env return = 302.72


 83%|██████████████████████████████▌      | 6601/8000 [4:03:14<50:23,  2.16s/it]

Episode 6601/8000, real env return = 303.64


 83%|██████████████████████████████▌      | 6611/8000 [4:03:36<52:44,  2.28s/it]

Episode 6611/8000, real env return = 303.59


 83%|██████████████████████████████▌      | 6621/8000 [4:03:59<53:17,  2.32s/it]

Episode 6621/8000, real env return = 300.73


 83%|██████████████████████████████▋      | 6631/8000 [4:04:19<45:46,  2.01s/it]

Episode 6631/8000, real env return = 302.88


 83%|██████████████████████████████▋      | 6641/8000 [4:04:39<46:04,  2.03s/it]

Episode 6641/8000, real env return = 301.94


 83%|██████████████████████████████▊      | 6651/8000 [4:05:01<47:58,  2.13s/it]

Episode 6651/8000, real env return = 301.99


 83%|██████████████████████████████▊      | 6661/8000 [4:05:24<47:47,  2.14s/it]

Episode 6661/8000, real env return = 303.20


 83%|██████████████████████████████▊      | 6671/8000 [4:05:45<47:56,  2.16s/it]

Episode 6671/8000, real env return = 302.50


 84%|██████████████████████████████▉      | 6681/8000 [4:06:07<48:05,  2.19s/it]

Episode 6681/8000, real env return = 301.98


 84%|██████████████████████████████▉      | 6691/8000 [4:06:27<41:55,  1.92s/it]

Episode 6691/8000, real env return = 301.20


 84%|██████████████████████████████▉      | 6701/8000 [4:06:48<43:09,  1.99s/it]

Episode 6701/8000, real env return = 303.67


 84%|███████████████████████████████      | 6711/8000 [4:07:10<50:05,  2.33s/it]

Episode 6711/8000, real env return = 304.69


 84%|███████████████████████████████      | 6721/8000 [4:07:32<49:11,  2.31s/it]

Episode 6721/8000, real env return = 304.60


 84%|███████████████████████████████▏     | 6731/8000 [4:07:53<43:16,  2.05s/it]

Episode 6731/8000, real env return = 304.04


 84%|███████████████████████████████▏     | 6741/8000 [4:08:15<43:26,  2.07s/it]

Episode 6741/8000, real env return = 303.11


 84%|███████████████████████████████▏     | 6751/8000 [4:08:36<41:21,  1.99s/it]

Episode 6751/8000, real env return = 299.82


 85%|███████████████████████████████▎     | 6761/8000 [4:08:58<44:57,  2.18s/it]

Episode 6761/8000, real env return = 301.54


 85%|███████████████████████████████▎     | 6771/8000 [4:09:19<43:36,  2.13s/it]

Episode 6771/8000, real env return = 300.06


 85%|███████████████████████████████▎     | 6781/8000 [4:09:42<45:49,  2.26s/it]

Episode 6781/8000, real env return = 304.42


 85%|███████████████████████████████▍     | 6791/8000 [4:10:01<43:12,  2.14s/it]

Episode 6791/8000, real env return = 304.74


 85%|███████████████████████████████▍     | 6801/8000 [4:10:23<41:40,  2.09s/it]

Episode 6801/8000, real env return = 305.69


 85%|███████████████████████████████▌     | 6811/8000 [4:10:45<41:46,  2.11s/it]

Episode 6811/8000, real env return = 308.26


 85%|███████████████████████████████▌     | 6821/8000 [4:11:06<41:52,  2.13s/it]

Episode 6821/8000, real env return = 308.77


 85%|███████████████████████████████▌     | 6831/8000 [4:11:26<38:47,  1.99s/it]

Episode 6831/8000, real env return = 306.92


 86%|███████████████████████████████▋     | 6841/8000 [4:11:48<42:15,  2.19s/it]

Episode 6841/8000, real env return = 308.92


 86%|███████████████████████████████▋     | 6851/8000 [4:12:09<41:17,  2.16s/it]

Episode 6851/8000, real env return = 306.75


 86%|███████████████████████████████▋     | 6861/8000 [4:12:31<42:47,  2.25s/it]

Episode 6861/8000, real env return = 306.14


 86%|███████████████████████████████▊     | 6871/8000 [4:12:52<39:01,  2.07s/it]

Episode 6871/8000, real env return = 309.30


 86%|███████████████████████████████▊     | 6881/8000 [4:13:11<33:28,  1.79s/it]

Episode 6881/8000, real env return = 306.97


 86%|███████████████████████████████▊     | 6891/8000 [4:13:33<40:40,  2.20s/it]

Episode 6891/8000, real env return = 307.56


 86%|███████████████████████████████▉     | 6901/8000 [4:13:52<38:14,  2.09s/it]

Episode 6901/8000, real env return = 306.74


 86%|███████████████████████████████▉     | 6911/8000 [4:14:14<38:25,  2.12s/it]

Episode 6911/8000, real env return = 306.61


 87%|████████████████████████████████     | 6921/8000 [4:14:36<41:51,  2.33s/it]

Episode 6921/8000, real env return = 302.63


 87%|████████████████████████████████     | 6931/8000 [4:14:57<39:28,  2.22s/it]

Episode 6931/8000, real env return = 307.27


 87%|████████████████████████████████     | 6941/8000 [4:15:18<35:52,  2.03s/it]

Episode 6941/8000, real env return = 304.21


 87%|████████████████████████████████▏    | 6951/8000 [4:15:40<38:08,  2.18s/it]

Episode 6951/8000, real env return = 306.32


 87%|████████████████████████████████▏    | 6961/8000 [4:16:03<40:39,  2.35s/it]

Episode 6961/8000, real env return = 306.10


 87%|████████████████████████████████▏    | 6971/8000 [4:16:26<42:07,  2.46s/it]

Episode 6971/8000, real env return = 305.98


 87%|████████████████████████████████▎    | 6981/8000 [4:16:47<40:36,  2.39s/it]

Episode 6981/8000, real env return = 307.35


 87%|████████████████████████████████▎    | 6991/8000 [4:17:07<34:52,  2.07s/it]

Episode 6991/8000, real env return = 305.79


 88%|████████████████████████████████▍    | 7001/8000 [4:17:28<33:49,  2.03s/it]

Episode 7001/8000, real env return = 301.59


 88%|████████████████████████████████▍    | 7011/8000 [4:17:52<39:50,  2.42s/it]

Episode 7011/8000, real env return = 304.16


 88%|████████████████████████████████▍    | 7021/8000 [4:18:14<39:57,  2.45s/it]

Episode 7021/8000, real env return = 304.62


 88%|████████████████████████████████▌    | 7031/8000 [4:18:33<32:36,  2.02s/it]

Episode 7031/8000, real env return = 307.70


 88%|████████████████████████████████▌    | 7041/8000 [4:18:53<31:00,  1.94s/it]

Episode 7041/8000, real env return = 305.94


 88%|████████████████████████████████▌    | 7051/8000 [4:19:14<30:01,  1.90s/it]

Episode 7051/8000, real env return = 305.62


 88%|████████████████████████████████▋    | 7061/8000 [4:19:34<32:15,  2.06s/it]

Episode 7061/8000, real env return = 307.59


 88%|████████████████████████████████▋    | 7071/8000 [4:19:54<31:59,  2.07s/it]

Episode 7071/8000, real env return = 305.13


 89%|████████████████████████████████▋    | 7081/8000 [4:20:13<29:50,  1.95s/it]

Episode 7081/8000, real env return = 303.45


 89%|████████████████████████████████▊    | 7091/8000 [4:20:33<29:17,  1.93s/it]

Episode 7091/8000, real env return = 302.77


 89%|████████████████████████████████▊    | 7101/8000 [4:20:55<31:26,  2.10s/it]

Episode 7101/8000, real env return = 303.14


 89%|████████████████████████████████▉    | 7111/8000 [4:21:16<33:42,  2.27s/it]

Episode 7111/8000, real env return = 305.89


 89%|████████████████████████████████▉    | 7121/8000 [4:21:38<33:09,  2.26s/it]

Episode 7121/8000, real env return = 303.09


 89%|████████████████████████████████▉    | 7131/8000 [4:21:59<33:32,  2.32s/it]

Episode 7131/8000, real env return = 306.59


 89%|█████████████████████████████████    | 7141/8000 [4:22:21<31:58,  2.23s/it]

Episode 7141/8000, real env return = 302.76


 89%|█████████████████████████████████    | 7151/8000 [4:22:41<31:15,  2.21s/it]

Episode 7151/8000, real env return = 304.63


 90%|█████████████████████████████████    | 7161/8000 [4:23:02<30:36,  2.19s/it]

Episode 7161/8000, real env return = 188.89


 90%|█████████████████████████████████▏   | 7171/8000 [4:23:23<29:30,  2.14s/it]

Episode 7171/8000, real env return = 305.82


 90%|█████████████████████████████████▏   | 7181/8000 [4:23:46<29:43,  2.18s/it]

Episode 7181/8000, real env return = 305.30


 90%|█████████████████████████████████▎   | 7191/8000 [4:24:06<27:37,  2.05s/it]

Episode 7191/8000, real env return = 303.04


 90%|█████████████████████████████████▎   | 7201/8000 [4:24:27<29:53,  2.25s/it]

Episode 7201/8000, real env return = 303.01


 90%|█████████████████████████████████▎   | 7211/8000 [4:24:48<28:41,  2.18s/it]

Episode 7211/8000, real env return = 300.45


 90%|█████████████████████████████████▍   | 7221/8000 [4:25:11<30:44,  2.37s/it]

Episode 7221/8000, real env return = 302.03


 90%|█████████████████████████████████▍   | 7231/8000 [4:25:31<26:47,  2.09s/it]

Episode 7231/8000, real env return = 303.66


 91%|█████████████████████████████████▍   | 7241/8000 [4:25:52<27:26,  2.17s/it]

Episode 7241/8000, real env return = 302.25


 91%|█████████████████████████████████▌   | 7251/8000 [4:26:13<27:52,  2.23s/it]

Episode 7251/8000, real env return = 300.04


 91%|█████████████████████████████████▌   | 7261/8000 [4:26:36<27:25,  2.23s/it]

Episode 7261/8000, real env return = 300.01


 91%|█████████████████████████████████▋   | 7271/8000 [4:26:57<24:19,  2.00s/it]

Episode 7271/8000, real env return = 298.69


 91%|█████████████████████████████████▋   | 7281/8000 [4:27:19<25:42,  2.15s/it]

Episode 7281/8000, real env return = 301.40


 91%|█████████████████████████████████▋   | 7291/8000 [4:27:40<23:45,  2.01s/it]

Episode 7291/8000, real env return = 300.15


 91%|█████████████████████████████████▊   | 7301/8000 [4:28:01<25:24,  2.18s/it]

Episode 7301/8000, real env return = 301.70


 91%|█████████████████████████████████▊   | 7311/8000 [4:28:22<24:32,  2.14s/it]

Episode 7311/8000, real env return = 299.53


 92%|█████████████████████████████████▊   | 7321/8000 [4:28:43<26:28,  2.34s/it]

Episode 7321/8000, real env return = 301.91


 92%|█████████████████████████████████▉   | 7331/8000 [4:29:03<21:31,  1.93s/it]

Episode 7331/8000, real env return = 301.58


 92%|█████████████████████████████████▉   | 7341/8000 [4:29:26<23:37,  2.15s/it]

Episode 7341/8000, real env return = 302.08


 92%|█████████████████████████████████▉   | 7351/8000 [4:29:46<20:04,  1.86s/it]

Episode 7351/8000, real env return = 303.31


 92%|██████████████████████████████████   | 7361/8000 [4:30:09<23:33,  2.21s/it]

Episode 7361/8000, real env return = 299.59


 92%|██████████████████████████████████   | 7371/8000 [4:30:29<23:42,  2.26s/it]

Episode 7371/8000, real env return = 301.48


 92%|██████████████████████████████████▏  | 7381/8000 [4:30:51<23:33,  2.28s/it]

Episode 7381/8000, real env return = 300.55


 92%|██████████████████████████████████▏  | 7391/8000 [4:31:10<20:10,  1.99s/it]

Episode 7391/8000, real env return = 302.30


 93%|██████████████████████████████████▏  | 7401/8000 [4:31:31<20:26,  2.05s/it]

Episode 7401/8000, real env return = 300.85


 93%|██████████████████████████████████▎  | 7411/8000 [4:31:53<21:27,  2.19s/it]

Episode 7411/8000, real env return = 300.75


 93%|██████████████████████████████████▎  | 7421/8000 [4:32:13<19:24,  2.01s/it]

Episode 7421/8000, real env return = 301.57


 93%|██████████████████████████████████▎  | 7431/8000 [4:32:33<18:42,  1.97s/it]

Episode 7431/8000, real env return = 305.89


 93%|██████████████████████████████████▍  | 7441/8000 [4:32:54<21:54,  2.35s/it]

Episode 7441/8000, real env return = 298.31


 93%|██████████████████████████████████▍  | 7451/8000 [4:33:16<18:47,  2.05s/it]

Episode 7451/8000, real env return = 300.07


 93%|██████████████████████████████████▌  | 7461/8000 [4:33:38<20:06,  2.24s/it]

Episode 7461/8000, real env return = 305.54


 93%|██████████████████████████████████▌  | 7471/8000 [4:33:59<18:17,  2.08s/it]

Episode 7471/8000, real env return = 303.71


 94%|██████████████████████████████████▌  | 7481/8000 [4:34:18<17:34,  2.03s/it]

Episode 7481/8000, real env return = 305.03


 94%|██████████████████████████████████▋  | 7491/8000 [4:34:40<19:42,  2.32s/it]

Episode 7491/8000, real env return = 305.61


 94%|██████████████████████████████████▋  | 7501/8000 [4:35:01<16:42,  2.01s/it]

Episode 7501/8000, real env return = 303.07


 94%|██████████████████████████████████▋  | 7511/8000 [4:35:20<15:52,  1.95s/it]

Episode 7511/8000, real env return = 303.36


 94%|██████████████████████████████████▊  | 7521/8000 [4:35:41<17:40,  2.21s/it]

Episode 7521/8000, real env return = 305.19


 94%|██████████████████████████████████▊  | 7531/8000 [4:36:05<19:10,  2.45s/it]

Episode 7531/8000, real env return = 305.62


 94%|██████████████████████████████████▉  | 7541/8000 [4:36:28<18:57,  2.48s/it]

Episode 7541/8000, real env return = 303.88


 94%|██████████████████████████████████▉  | 7551/8000 [4:36:49<15:36,  2.09s/it]

Episode 7551/8000, real env return = 302.34


 95%|██████████████████████████████████▉  | 7561/8000 [4:37:09<15:32,  2.12s/it]

Episode 7561/8000, real env return = 304.61


 95%|███████████████████████████████████  | 7571/8000 [4:37:33<16:05,  2.25s/it]

Episode 7571/8000, real env return = 305.45


 95%|███████████████████████████████████  | 7581/8000 [4:37:54<15:45,  2.26s/it]

Episode 7581/8000, real env return = 304.15


 95%|███████████████████████████████████  | 7591/8000 [4:38:17<15:41,  2.30s/it]

Episode 7591/8000, real env return = 304.57


 95%|███████████████████████████████████▏ | 7601/8000 [4:38:39<15:29,  2.33s/it]

Episode 7601/8000, real env return = 306.24


 95%|███████████████████████████████████▏ | 7611/8000 [4:39:00<14:51,  2.29s/it]

Episode 7611/8000, real env return = 304.73


 95%|███████████████████████████████████▏ | 7621/8000 [4:39:20<12:48,  2.03s/it]

Episode 7621/8000, real env return = 305.99


 95%|███████████████████████████████████▎ | 7631/8000 [4:39:42<13:30,  2.20s/it]

Episode 7631/8000, real env return = 307.35


 96%|███████████████████████████████████▎ | 7641/8000 [4:40:01<11:56,  2.00s/it]

Episode 7641/8000, real env return = 307.64


 96%|███████████████████████████████████▍ | 7651/8000 [4:40:20<10:38,  1.83s/it]

Episode 7651/8000, real env return = 306.74


 96%|███████████████████████████████████▍ | 7661/8000 [4:40:41<11:26,  2.03s/it]

Episode 7661/8000, real env return = 306.46


 96%|███████████████████████████████████▍ | 7671/8000 [4:41:02<11:13,  2.05s/it]

Episode 7671/8000, real env return = 301.05


 96%|███████████████████████████████████▌ | 7681/8000 [4:41:23<10:36,  2.00s/it]

Episode 7681/8000, real env return = 306.31


 96%|███████████████████████████████████▌ | 7691/8000 [4:41:42<09:55,  1.93s/it]

Episode 7691/8000, real env return = 299.26


 96%|███████████████████████████████████▌ | 7701/8000 [4:42:03<10:10,  2.04s/it]

Episode 7701/8000, real env return = 304.06


 96%|███████████████████████████████████▋ | 7711/8000 [4:42:22<09:28,  1.97s/it]

Episode 7711/8000, real env return = 306.36


 97%|███████████████████████████████████▋ | 7721/8000 [4:42:42<09:52,  2.12s/it]

Episode 7721/8000, real env return = 302.39


 97%|███████████████████████████████████▊ | 7731/8000 [4:43:02<09:11,  2.05s/it]

Episode 7731/8000, real env return = 302.17


 97%|███████████████████████████████████▊ | 7741/8000 [4:43:25<10:21,  2.40s/it]

Episode 7741/8000, real env return = 301.98


 97%|███████████████████████████████████▊ | 7751/8000 [4:43:45<08:23,  2.02s/it]

Episode 7751/8000, real env return = 307.42


 97%|███████████████████████████████████▉ | 7761/8000 [4:44:06<08:36,  2.16s/it]

Episode 7761/8000, real env return = 305.40


 97%|███████████████████████████████████▉ | 7771/8000 [4:44:26<07:55,  2.07s/it]

Episode 7771/8000, real env return = 306.95


 97%|███████████████████████████████████▉ | 7781/8000 [4:44:47<07:43,  2.12s/it]

Episode 7781/8000, real env return = 301.42


 97%|████████████████████████████████████ | 7791/8000 [4:45:10<08:30,  2.44s/it]

Episode 7791/8000, real env return = 306.74


 98%|████████████████████████████████████ | 7801/8000 [4:45:31<07:03,  2.13s/it]

Episode 7801/8000, real env return = -9.43


 98%|████████████████████████████████████▏| 7811/8000 [4:45:52<06:33,  2.08s/it]

Episode 7811/8000, real env return = 307.97


 98%|████████████████████████████████████▏| 7821/8000 [4:46:13<05:43,  1.92s/it]

Episode 7821/8000, real env return = 308.44


 98%|████████████████████████████████████▏| 7831/8000 [4:46:33<05:40,  2.02s/it]

Episode 7831/8000, real env return = 305.96


 98%|████████████████████████████████████▎| 7841/8000 [4:46:54<05:31,  2.09s/it]

Episode 7841/8000, real env return = 306.69


 98%|████████████████████████████████████▎| 7851/8000 [4:47:15<05:15,  2.12s/it]

Episode 7851/8000, real env return = 306.19


 98%|████████████████████████████████████▎| 7861/8000 [4:47:36<05:09,  2.23s/it]

Episode 7861/8000, real env return = 309.12


 98%|████████████████████████████████████▍| 7871/8000 [4:47:56<04:06,  1.91s/it]

Episode 7871/8000, real env return = 305.93


 99%|████████████████████████████████████▍| 7881/8000 [4:48:17<04:17,  2.17s/it]

Episode 7881/8000, real env return = 307.45


 99%|████████████████████████████████████▍| 7891/8000 [4:48:39<03:58,  2.19s/it]

Episode 7891/8000, real env return = 304.02


 99%|████████████████████████████████████▌| 7901/8000 [4:49:00<03:53,  2.36s/it]

Episode 7901/8000, real env return = 300.56


 99%|████████████████████████████████████▌| 7911/8000 [4:49:20<02:40,  1.80s/it]

Episode 7911/8000, real env return = 306.36


 99%|████████████████████████████████████▋| 7921/8000 [4:49:40<02:34,  1.96s/it]

Episode 7921/8000, real env return = 306.86


 99%|████████████████████████████████████▋| 7931/8000 [4:50:00<02:17,  2.00s/it]

Episode 7931/8000, real env return = 306.43


 99%|████████████████████████████████████▋| 7941/8000 [4:50:22<02:07,  2.16s/it]

Episode 7941/8000, real env return = 304.58


 99%|████████████████████████████████████▊| 7951/8000 [4:50:42<01:50,  2.26s/it]

Episode 7951/8000, real env return = 304.76


100%|████████████████████████████████████▊| 7961/8000 [4:51:02<01:13,  1.89s/it]

Episode 7961/8000, real env return = 305.82


100%|████████████████████████████████████▊| 7971/8000 [4:51:23<00:58,  2.02s/it]

Episode 7971/8000, real env return = 305.35


100%|████████████████████████████████████▉| 7981/8000 [4:51:44<00:39,  2.10s/it]

Episode 7981/8000, real env return = 307.08


100%|████████████████████████████████████▉| 7991/8000 [4:52:07<00:20,  2.28s/it]

Episode 7991/8000, real env return = 307.41


100%|█████████████████████████████████████| 8000/8000 [4:52:25<00:00,  2.19s/it]


Training finished.


In [10]:
train2 = main()

Using cuda device


  0%|                                                  | 0/8000 [00:00<?, ?it/s]

Episode 1/8000, real env return = -104.06


  0%|                                       | 11/8000 [00:15<3:58:17,  1.79s/it]

Episode 11/8000, real env return = -84.88


  0%|                                       | 21/8000 [00:47<9:06:08,  4.11s/it]

Episode 21/8000, real env return = -115.90


  0%|▏                                      | 31/8000 [01:15<8:01:10,  3.62s/it]

Episode 31/8000, real env return = -124.40


  1%|▏                                      | 41/8000 [01:42<5:32:07,  2.50s/it]

Episode 41/8000, real env return = -122.65


  1%|▏                                      | 51/8000 [02:08<6:51:58,  3.11s/it]

Episode 51/8000, real env return = -107.65


  1%|▎                                      | 61/8000 [02:34<6:51:47,  3.11s/it]

Episode 61/8000, real env return = -157.61


  1%|▎                                      | 71/8000 [03:13<8:39:44,  3.93s/it]

Episode 71/8000, real env return = -97.32


  1%|▍                                      | 81/8000 [03:48<8:48:02,  4.00s/it]

Episode 81/8000, real env return = -201.74


  1%|▍                                      | 91/8000 [04:24<7:54:39,  3.60s/it]

Episode 91/8000, real env return = -212.55


  1%|▍                                    | 101/8000 [05:13<10:29:33,  4.78s/it]

Episode 101/8000, real env return = -116.64


  1%|▌                                    | 111/8000 [06:03<10:19:19,  4.71s/it]

Episode 111/8000, real env return = -111.85


  2%|▌                                     | 121/8000 [06:52<9:51:15,  4.50s/it]

Episode 121/8000, real env return = -79.54


  2%|▌                                     | 131/8000 [07:32<8:54:59,  4.08s/it]

Episode 131/8000, real env return = -113.74


  2%|▋                                    | 141/8000 [08:20<10:44:28,  4.92s/it]

Episode 141/8000, real env return = -208.70


  2%|▋                                     | 151/8000 [09:09<9:58:31,  4.58s/it]

Episode 151/8000, real env return = -109.89


  2%|▋                                    | 161/8000 [09:55<10:41:14,  4.91s/it]

Episode 161/8000, real env return = -129.58


  2%|▊                                    | 171/8000 [10:55<12:44:16,  5.86s/it]

Episode 171/8000, real env return = -103.48


  2%|▊                                    | 181/8000 [11:49<11:23:04,  5.24s/it]

Episode 181/8000, real env return = -100.78


  2%|▉                                    | 191/8000 [12:38<11:26:29,  5.27s/it]

Episode 191/8000, real env return = -141.60


  3%|▉                                    | 201/8000 [13:30<11:14:02,  5.19s/it]

Episode 201/8000, real env return = -121.17


  3%|▉                                    | 211/8000 [14:27<10:57:08,  5.06s/it]

Episode 211/8000, real env return = -127.27


  3%|█                                    | 221/8000 [15:28<13:03:21,  6.04s/it]

Episode 221/8000, real env return = -111.61


  3%|█                                    | 231/8000 [16:26<13:13:26,  6.13s/it]

Episode 231/8000, real env return = -113.30


  3%|█                                    | 241/8000 [17:29<14:05:41,  6.54s/it]

Episode 241/8000, real env return = -122.73


  3%|█▏                                   | 251/8000 [18:29<13:15:00,  6.16s/it]

Episode 251/8000, real env return = -105.43


  3%|█▏                                   | 261/8000 [19:38<14:00:12,  6.51s/it]

Episode 261/8000, real env return = -113.50


  3%|█▎                                   | 271/8000 [20:44<14:36:34,  6.80s/it]

Episode 271/8000, real env return = -107.36


  4%|█▎                                   | 281/8000 [21:46<13:27:37,  6.28s/it]

Episode 281/8000, real env return = -95.62


  4%|█▎                                   | 291/8000 [22:36<11:46:22,  5.50s/it]

Episode 291/8000, real env return = -106.53


  4%|█▍                                   | 301/8000 [23:45<15:30:08,  7.25s/it]

Episode 301/8000, real env return = -58.09


  4%|█▍                                   | 311/8000 [24:46<13:04:44,  6.12s/it]

Episode 311/8000, real env return = -60.38


  4%|█▍                                   | 321/8000 [25:55<15:25:42,  7.23s/it]

Episode 321/8000, real env return = -47.86


  4%|█▌                                   | 331/8000 [27:07<15:33:28,  7.30s/it]

Episode 331/8000, real env return = -72.04


  4%|█▌                                   | 341/8000 [28:13<14:54:04,  7.00s/it]

Episode 341/8000, real env return = -85.09


  4%|█▌                                   | 351/8000 [29:12<12:56:55,  6.09s/it]

Episode 351/8000, real env return = -94.68


  5%|█▋                                   | 361/8000 [30:23<14:49:32,  6.99s/it]

Episode 361/8000, real env return = -120.24


  5%|█▋                                   | 371/8000 [31:35<16:18:24,  7.69s/it]

Episode 371/8000, real env return = -93.95


  5%|█▊                                   | 381/8000 [32:37<12:42:30,  6.00s/it]

Episode 381/8000, real env return = -71.96


  5%|█▊                                   | 391/8000 [33:36<11:47:36,  5.58s/it]

Episode 391/8000, real env return = -117.15


  5%|█▊                                   | 401/8000 [34:39<12:48:08,  6.07s/it]

Episode 401/8000, real env return = -106.71


  5%|█▉                                   | 411/8000 [35:44<13:46:59,  6.54s/it]

Episode 411/8000, real env return = -163.72


  5%|█▉                                   | 421/8000 [36:43<12:46:16,  6.07s/it]

Episode 421/8000, real env return = -111.62


  5%|█▉                                   | 431/8000 [37:49<14:48:47,  7.05s/it]

Episode 431/8000, real env return = -67.39


  6%|██                                   | 441/8000 [38:58<15:41:35,  7.47s/it]

Episode 441/8000, real env return = -51.05


  6%|██                                   | 451/8000 [40:02<13:08:29,  6.27s/it]

Episode 451/8000, real env return = -124.14


  6%|██▏                                  | 461/8000 [40:58<12:24:22,  5.92s/it]

Episode 461/8000, real env return = -106.81


  6%|██▏                                  | 471/8000 [41:52<11:57:18,  5.72s/it]

Episode 471/8000, real env return = -104.12


  6%|██▏                                  | 481/8000 [42:44<10:57:22,  5.25s/it]

Episode 481/8000, real env return = -88.82


  6%|██▎                                  | 491/8000 [43:33<11:15:46,  5.40s/it]

Episode 491/8000, real env return = -102.97


  6%|██▎                                  | 501/8000 [44:27<12:23:32,  5.95s/it]

Episode 501/8000, real env return = -53.83


  6%|██▎                                  | 511/8000 [45:26<12:19:43,  5.93s/it]

Episode 511/8000, real env return = -65.57


  7%|██▍                                  | 521/8000 [46:40<14:30:41,  6.99s/it]

Episode 521/8000, real env return = -74.71


  7%|██▌                                   | 531/8000 [47:36<9:57:06,  4.80s/it]

Episode 531/8000, real env return = -101.73


  7%|██▌                                   | 541/8000 [48:24<8:28:19,  4.09s/it]

Episode 541/8000, real env return = -132.71


  7%|██▌                                  | 551/8000 [49:19<11:10:09,  5.40s/it]

Episode 551/8000, real env return = -125.08


  7%|██▌                                  | 561/8000 [50:14<11:14:21,  5.44s/it]

Episode 561/8000, real env return = -98.20


  7%|██▋                                   | 571/8000 [51:04<8:12:01,  3.97s/it]

Episode 571/8000, real env return = -97.76


  7%|██▊                                   | 581/8000 [51:54<9:14:30,  4.48s/it]

Episode 581/8000, real env return = -97.69


  7%|██▊                                   | 591/8000 [52:44<8:55:03,  4.33s/it]

Episode 591/8000, real env return = -98.51


  8%|██▊                                   | 601/8000 [53:28<8:39:44,  4.21s/it]

Episode 601/8000, real env return = -99.13


  8%|██▉                                   | 611/8000 [54:09<8:27:46,  4.12s/it]

Episode 611/8000, real env return = -100.51


  8%|██▉                                   | 621/8000 [54:55<9:30:15,  4.64s/it]

Episode 621/8000, real env return = -102.70


  8%|██▉                                   | 631/8000 [55:37<7:33:31,  3.69s/it]

Episode 631/8000, real env return = -99.93


  8%|██▉                                  | 641/8000 [56:25<10:08:34,  4.96s/it]

Episode 641/8000, real env return = -103.50


  8%|███                                  | 651/8000 [57:10<10:03:48,  4.93s/it]

Episode 651/8000, real env return = -121.20


  8%|███▏                                  | 661/8000 [57:55<7:56:34,  3.90s/it]

Episode 661/8000, real env return = -112.15


  8%|███▏                                  | 671/8000 [58:41<8:57:14,  4.40s/it]

Episode 671/8000, real env return = -111.70


  9%|███▏                                  | 681/8000 [59:25<9:14:50,  4.55s/it]

Episode 681/8000, real env return = -107.64


  9%|███                                 | 691/8000 [1:00:02<7:08:13,  3.52s/it]

Episode 691/8000, real env return = -108.75


  9%|███▏                                | 701/8000 [1:00:44<8:43:58,  4.31s/it]

Episode 701/8000, real env return = -112.40


  9%|███                                | 711/8000 [1:01:26<10:08:42,  5.01s/it]

Episode 711/8000, real env return = -113.23


  9%|███▏                                | 721/8000 [1:02:09<7:54:03,  3.91s/it]

Episode 721/8000, real env return = -112.72


  9%|███▎                                | 731/8000 [1:02:43<6:58:36,  3.46s/it]

Episode 731/8000, real env return = -111.04


  9%|███▎                                | 741/8000 [1:03:26<9:41:31,  4.81s/it]

Episode 741/8000, real env return = -109.01


  9%|███▍                                | 751/8000 [1:04:02<6:31:34,  3.24s/it]

Episode 751/8000, real env return = -113.90


 10%|███▍                                | 761/8000 [1:04:42<7:05:26,  3.53s/it]

Episode 761/8000, real env return = -112.83


 10%|███▍                                | 771/8000 [1:05:17<5:44:30,  2.86s/it]

Episode 771/8000, real env return = -114.06


 10%|███▌                                | 781/8000 [1:05:57<8:19:46,  4.15s/it]

Episode 781/8000, real env return = -112.84


 10%|███▌                                | 791/8000 [1:06:34<6:56:07,  3.46s/it]

Episode 791/8000, real env return = -107.29


 10%|███▌                                | 801/8000 [1:07:10<7:52:47,  3.94s/it]

Episode 801/8000, real env return = -109.02


 10%|███▋                                | 811/8000 [1:07:47<5:43:25,  2.87s/it]

Episode 811/8000, real env return = -107.39


 10%|███▋                                | 821/8000 [1:08:28<8:13:57,  4.13s/it]

Episode 821/8000, real env return = -113.23


 10%|███▋                                | 831/8000 [1:09:09<8:08:33,  4.09s/it]

Episode 831/8000, real env return = -108.09


 11%|███▊                                | 841/8000 [1:09:48<7:01:55,  3.54s/it]

Episode 841/8000, real env return = -108.66


 11%|███▊                                | 851/8000 [1:10:24<7:08:26,  3.60s/it]

Episode 851/8000, real env return = -113.13


 11%|███▊                                | 861/8000 [1:11:06<9:20:50,  4.71s/it]

Episode 861/8000, real env return = -108.45


 11%|███▉                                | 871/8000 [1:11:42<9:12:36,  4.65s/it]

Episode 871/8000, real env return = -107.36


 11%|███▉                                | 881/8000 [1:12:19<8:16:02,  4.18s/it]

Episode 881/8000, real env return = -109.28


 11%|████                                | 891/8000 [1:12:52<5:28:00,  2.77s/it]

Episode 891/8000, real env return = -106.86


 11%|████                                | 901/8000 [1:13:22<6:29:55,  3.30s/it]

Episode 901/8000, real env return = -107.01


 11%|████                                | 911/8000 [1:13:54<7:20:52,  3.73s/it]

Episode 911/8000, real env return = -107.35


 12%|████▏                               | 921/8000 [1:14:28<6:25:05,  3.26s/it]

Episode 921/8000, real env return = -107.45


 12%|████▏                               | 931/8000 [1:14:57<6:30:14,  3.31s/it]

Episode 931/8000, real env return = -106.77


 12%|████▏                               | 941/8000 [1:15:30<5:38:56,  2.88s/it]

Episode 941/8000, real env return = -106.39


 12%|████▎                               | 951/8000 [1:15:56<5:30:09,  2.81s/it]

Episode 951/8000, real env return = -108.49


 12%|████▎                               | 961/8000 [1:16:25<5:56:13,  3.04s/it]

Episode 961/8000, real env return = -108.13


 12%|████▎                               | 971/8000 [1:16:55<6:54:42,  3.54s/it]

Episode 971/8000, real env return = -107.34


 12%|████▍                               | 981/8000 [1:17:26<4:57:27,  2.54s/it]

Episode 981/8000, real env return = -108.22


 12%|████▍                               | 991/8000 [1:17:57<6:54:58,  3.55s/it]

Episode 991/8000, real env return = -106.77


 13%|████▍                              | 1001/8000 [1:18:26<5:51:30,  3.01s/it]

Episode 1001/8000, real env return = -106.61


 13%|████▍                              | 1011/8000 [1:18:52<4:44:26,  2.44s/it]

Episode 1011/8000, real env return = -107.24


 13%|████▍                              | 1021/8000 [1:19:22<5:14:10,  2.70s/it]

Episode 1021/8000, real env return = -106.46


 13%|████▌                              | 1031/8000 [1:19:56<6:47:47,  3.51s/it]

Episode 1031/8000, real env return = -107.50


 13%|████▌                              | 1041/8000 [1:20:21<4:00:47,  2.08s/it]

Episode 1041/8000, real env return = -108.36


 13%|████▌                              | 1051/8000 [1:20:45<5:06:52,  2.65s/it]

Episode 1051/8000, real env return = -106.60


 13%|████▋                              | 1061/8000 [1:21:17<5:02:58,  2.62s/it]

Episode 1061/8000, real env return = -107.13


 13%|████▋                              | 1071/8000 [1:21:48<5:41:09,  2.95s/it]

Episode 1071/8000, real env return = -100.61


 14%|████▋                              | 1081/8000 [1:22:16<3:41:47,  1.92s/it]

Episode 1081/8000, real env return = -102.66


 14%|████▊                              | 1091/8000 [1:22:50<7:31:56,  3.92s/it]

Episode 1091/8000, real env return = -106.54


 14%|████▊                              | 1101/8000 [1:23:22<6:31:15,  3.40s/it]

Episode 1101/8000, real env return = -107.00


 14%|████▊                              | 1111/8000 [1:23:51<6:47:32,  3.55s/it]

Episode 1111/8000, real env return = -109.77


 14%|████▉                              | 1121/8000 [1:24:16<4:15:19,  2.23s/it]

Episode 1121/8000, real env return = -106.74


 14%|████▉                              | 1131/8000 [1:24:53<7:09:47,  3.75s/it]

Episode 1131/8000, real env return = -115.24


 14%|████▉                              | 1141/8000 [1:25:23<6:30:19,  3.41s/it]

Episode 1141/8000, real env return = -106.76


 14%|█████                              | 1151/8000 [1:25:48<4:49:23,  2.54s/it]

Episode 1151/8000, real env return = -109.12


 15%|█████                              | 1161/8000 [1:26:34<8:35:57,  4.53s/it]

Episode 1161/8000, real env return = -109.78


 15%|█████                              | 1171/8000 [1:27:06<7:00:55,  3.70s/it]

Episode 1171/8000, real env return = -114.30


 15%|█████▏                             | 1181/8000 [1:27:34<4:51:59,  2.57s/it]

Episode 1181/8000, real env return = -106.73


 15%|█████▏                             | 1191/8000 [1:28:08<5:13:43,  2.76s/it]

Episode 1191/8000, real env return = -108.07


 15%|█████▎                             | 1201/8000 [1:28:38<5:39:28,  3.00s/it]

Episode 1201/8000, real env return = -117.13


 15%|█████▎                             | 1211/8000 [1:29:13<8:29:00,  4.50s/it]

Episode 1211/8000, real env return = -107.84


 15%|█████▎                             | 1221/8000 [1:29:42<6:16:04,  3.33s/it]

Episode 1221/8000, real env return = -106.98


 15%|█████▍                             | 1231/8000 [1:30:07<4:40:22,  2.49s/it]

Episode 1231/8000, real env return = -107.06


 16%|█████▍                             | 1241/8000 [1:30:41<6:08:59,  3.28s/it]

Episode 1241/8000, real env return = -106.74


 16%|█████▍                             | 1251/8000 [1:31:09<5:45:48,  3.07s/it]

Episode 1251/8000, real env return = -114.63


 16%|█████▌                             | 1261/8000 [1:31:35<5:15:44,  2.81s/it]

Episode 1261/8000, real env return = -106.66


 16%|█████▌                             | 1271/8000 [1:32:08<7:29:23,  4.01s/it]

Episode 1271/8000, real env return = -106.55


 16%|█████▌                             | 1281/8000 [1:32:31<4:04:22,  2.18s/it]

Episode 1281/8000, real env return = -106.73


 16%|█████▋                             | 1291/8000 [1:32:58<4:45:03,  2.55s/it]

Episode 1291/8000, real env return = -106.74


 16%|█████▋                             | 1301/8000 [1:33:24<3:57:52,  2.13s/it]

Episode 1301/8000, real env return = -106.94


 16%|█████▋                             | 1311/8000 [1:33:50<4:03:24,  2.18s/it]

Episode 1311/8000, real env return = -106.43


 17%|█████▊                             | 1321/8000 [1:34:22<4:27:47,  2.41s/it]

Episode 1321/8000, real env return = -106.91


 17%|█████▊                             | 1331/8000 [1:34:40<4:16:18,  2.31s/it]

Episode 1331/8000, real env return = -99.84


 17%|█████▊                             | 1341/8000 [1:35:02<3:54:33,  2.11s/it]

Episode 1341/8000, real env return = -101.11


 17%|█████▉                             | 1351/8000 [1:35:22<3:24:26,  1.84s/it]

Episode 1351/8000, real env return = -107.19


 17%|█████▉                             | 1361/8000 [1:35:43<3:51:58,  2.10s/it]

Episode 1361/8000, real env return = -108.90


 17%|█████▉                             | 1371/8000 [1:36:09<3:47:23,  2.06s/it]

Episode 1371/8000, real env return = -106.82


 17%|██████                             | 1381/8000 [1:36:38<6:23:19,  3.47s/it]

Episode 1381/8000, real env return = -107.51


 17%|██████                             | 1391/8000 [1:36:58<2:42:23,  1.47s/it]

Episode 1391/8000, real env return = -106.71


 18%|██████▏                            | 1401/8000 [1:37:20<3:41:35,  2.01s/it]

Episode 1401/8000, real env return = -107.28


 18%|██████▏                            | 1411/8000 [1:37:47<4:06:12,  2.24s/it]

Episode 1411/8000, real env return = -107.50


 18%|██████▏                            | 1421/8000 [1:38:10<3:42:11,  2.03s/it]

Episode 1421/8000, real env return = -107.86


 18%|██████▎                            | 1431/8000 [1:38:36<4:23:58,  2.41s/it]

Episode 1431/8000, real env return = -105.29


 18%|██████▎                            | 1441/8000 [1:38:53<2:56:01,  1.61s/it]

Episode 1441/8000, real env return = -102.07


 18%|██████▎                            | 1451/8000 [1:39:15<3:48:16,  2.09s/it]

Episode 1451/8000, real env return = -122.11


 18%|██████▍                            | 1461/8000 [1:39:40<4:40:06,  2.57s/it]

Episode 1461/8000, real env return = -106.84


 18%|██████▍                            | 1471/8000 [1:40:06<3:57:40,  2.18s/it]

Episode 1471/8000, real env return = -107.09


 19%|██████▍                            | 1481/8000 [1:40:36<5:31:39,  3.05s/it]

Episode 1481/8000, real env return = -108.75


 19%|██████▌                            | 1491/8000 [1:41:04<4:04:00,  2.25s/it]

Episode 1491/8000, real env return = -106.62


 19%|██████▌                            | 1501/8000 [1:41:24<4:15:20,  2.36s/it]

Episode 1501/8000, real env return = -98.95


 19%|██████▌                            | 1511/8000 [1:41:48<5:19:06,  2.95s/it]

Episode 1511/8000, real env return = -100.65


 19%|██████▋                            | 1521/8000 [1:42:17<5:21:38,  2.98s/it]

Episode 1521/8000, real env return = -47.87


 19%|██████▋                            | 1531/8000 [1:42:45<5:05:53,  2.84s/it]

Episode 1531/8000, real env return = -100.36


 19%|██████▋                            | 1541/8000 [1:43:09<4:23:50,  2.45s/it]

Episode 1541/8000, real env return = -107.12


 19%|██████▊                            | 1551/8000 [1:43:39<6:31:37,  3.64s/it]

Episode 1551/8000, real env return = -107.19


 20%|██████▊                            | 1561/8000 [1:44:03<4:23:37,  2.46s/it]

Episode 1561/8000, real env return = -108.02


 20%|██████▊                            | 1571/8000 [1:44:28<4:02:56,  2.27s/it]

Episode 1571/8000, real env return = -107.07


 20%|██████▉                            | 1581/8000 [1:44:55<4:29:11,  2.52s/it]

Episode 1581/8000, real env return = -107.05


 20%|██████▉                            | 1591/8000 [1:45:15<4:12:42,  2.37s/it]

Episode 1591/8000, real env return = -107.19


 20%|███████                            | 1601/8000 [1:45:42<5:08:42,  2.89s/it]

Episode 1601/8000, real env return = -106.70


 20%|███████                            | 1611/8000 [1:46:08<5:48:20,  3.27s/it]

Episode 1611/8000, real env return = -106.65


 20%|███████                            | 1621/8000 [1:46:33<4:12:32,  2.38s/it]

Episode 1621/8000, real env return = -106.60


 20%|███████▏                           | 1631/8000 [1:47:21<7:07:04,  4.02s/it]

Episode 1631/8000, real env return = -58.04


 21%|███████▏                           | 1641/8000 [1:47:54<6:02:42,  3.42s/it]

Episode 1641/8000, real env return = -65.59


 21%|███████▏                           | 1651/8000 [1:48:25<5:00:06,  2.84s/it]

Episode 1651/8000, real env return = -45.17


 21%|███████▎                           | 1661/8000 [1:48:58<5:26:25,  3.09s/it]

Episode 1661/8000, real env return = -101.83


 21%|███████▎                           | 1671/8000 [1:49:13<3:04:23,  1.75s/it]

Episode 1671/8000, real env return = -106.88


 21%|███████▎                           | 1681/8000 [1:49:42<4:21:34,  2.48s/it]

Episode 1681/8000, real env return = -107.57


 21%|███████▍                           | 1691/8000 [1:50:07<3:39:38,  2.09s/it]

Episode 1691/8000, real env return = -109.15


 21%|███████▍                           | 1701/8000 [1:50:38<5:42:56,  3.27s/it]

Episode 1701/8000, real env return = -110.26


 21%|███████▍                           | 1711/8000 [1:50:57<3:46:51,  2.16s/it]

Episode 1711/8000, real env return = -107.35


 22%|███████▌                           | 1721/8000 [1:51:22<3:36:00,  2.06s/it]

Episode 1721/8000, real env return = -108.35


 22%|███████▌                           | 1731/8000 [1:51:46<3:45:07,  2.15s/it]

Episode 1731/8000, real env return = -99.49


 22%|███████▌                           | 1741/8000 [1:52:15<4:36:58,  2.66s/it]

Episode 1741/8000, real env return = -109.68


 22%|███████▋                           | 1751/8000 [1:52:36<4:37:01,  2.66s/it]

Episode 1751/8000, real env return = -106.95


 22%|███████▋                           | 1761/8000 [1:52:55<2:46:09,  1.60s/it]

Episode 1761/8000, real env return = -106.54


 22%|███████▋                           | 1771/8000 [1:53:24<3:40:43,  2.13s/it]

Episode 1771/8000, real env return = -113.00


 22%|███████▊                           | 1781/8000 [1:53:42<3:50:51,  2.23s/it]

Episode 1781/8000, real env return = -106.76


 22%|███████▊                           | 1791/8000 [1:54:06<3:54:50,  2.27s/it]

Episode 1791/8000, real env return = -106.44


 23%|███████▉                           | 1801/8000 [1:54:33<5:28:20,  3.18s/it]

Episode 1801/8000, real env return = -106.42


 23%|███████▉                           | 1811/8000 [1:54:56<4:34:01,  2.66s/it]

Episode 1811/8000, real env return = -111.37


 23%|███████▉                           | 1821/8000 [1:55:27<4:09:47,  2.43s/it]

Episode 1821/8000, real env return = -105.64


 23%|████████                           | 1831/8000 [1:55:51<3:25:18,  2.00s/it]

Episode 1831/8000, real env return = -106.97


 23%|████████                           | 1841/8000 [1:56:13<3:20:56,  1.96s/it]

Episode 1841/8000, real env return = -108.85


 23%|████████                           | 1851/8000 [1:56:34<2:51:49,  1.68s/it]

Episode 1851/8000, real env return = -107.52


 23%|████████▏                          | 1861/8000 [1:57:01<4:41:23,  2.75s/it]

Episode 1861/8000, real env return = -95.05


 23%|████████▏                          | 1871/8000 [1:57:41<6:19:56,  3.72s/it]

Episode 1871/8000, real env return = -65.32


 24%|████████▏                          | 1881/8000 [1:58:15<5:29:09,  3.23s/it]

Episode 1881/8000, real env return = -57.07


 24%|████████▎                          | 1891/8000 [1:58:41<5:48:58,  3.43s/it]

Episode 1891/8000, real env return = -54.02


 24%|████████▎                          | 1901/8000 [1:59:20<5:48:57,  3.43s/it]

Episode 1901/8000, real env return = -58.04


 24%|████████▎                          | 1911/8000 [2:00:02<8:30:17,  5.03s/it]

Episode 1911/8000, real env return = -82.70


 24%|████████▍                          | 1921/8000 [2:00:30<3:50:52,  2.28s/it]

Episode 1921/8000, real env return = -132.78


 24%|████████▍                          | 1931/8000 [2:00:44<2:29:48,  1.48s/it]

Episode 1931/8000, real env return = -79.71


 24%|████████▍                          | 1941/8000 [2:01:03<2:58:01,  1.76s/it]

Episode 1941/8000, real env return = -74.68


 24%|████████▌                          | 1951/8000 [2:01:19<2:48:14,  1.67s/it]

Episode 1951/8000, real env return = -54.04


 25%|████████▌                          | 1961/8000 [2:01:36<2:27:28,  1.47s/it]

Episode 1961/8000, real env return = -74.23


 25%|████████▌                          | 1971/8000 [2:01:52<2:43:01,  1.62s/it]

Episode 1971/8000, real env return = -77.88


 25%|████████▋                          | 1981/8000 [2:02:09<2:53:19,  1.73s/it]

Episode 1981/8000, real env return = -52.37


 25%|████████▋                          | 1991/8000 [2:02:27<2:50:23,  1.70s/it]

Episode 1991/8000, real env return = -66.24


 25%|████████▊                          | 2001/8000 [2:02:44<2:33:23,  1.53s/it]

Episode 2001/8000, real env return = -58.66


 25%|████████▊                          | 2011/8000 [2:02:58<2:01:53,  1.22s/it]

Episode 2011/8000, real env return = -66.93


 25%|████████▊                          | 2021/8000 [2:03:14<3:00:50,  1.81s/it]

Episode 2021/8000, real env return = -51.17


 25%|████████▉                          | 2031/8000 [2:03:32<3:25:25,  2.06s/it]

Episode 2031/8000, real env return = -70.03


 26%|████████▉                          | 2041/8000 [2:03:51<3:18:13,  2.00s/it]

Episode 2041/8000, real env return = -61.72


 26%|████████▉                          | 2051/8000 [2:04:03<2:14:33,  1.36s/it]

Episode 2051/8000, real env return = -106.26


 26%|█████████                          | 2061/8000 [2:04:18<2:48:15,  1.70s/it]

Episode 2061/8000, real env return = -90.84


 26%|█████████                          | 2071/8000 [2:04:37<3:01:15,  1.83s/it]

Episode 2071/8000, real env return = -69.13


 26%|█████████                          | 2081/8000 [2:04:54<2:29:43,  1.52s/it]

Episode 2081/8000, real env return = -79.86


 26%|█████████▏                         | 2091/8000 [2:05:09<2:32:25,  1.55s/it]

Episode 2091/8000, real env return = -93.02


 26%|█████████▏                         | 2101/8000 [2:05:29<3:30:07,  2.14s/it]

Episode 2101/8000, real env return = -62.00


 26%|█████████▏                         | 2111/8000 [2:05:47<3:00:21,  1.84s/it]

Episode 2111/8000, real env return = -104.26


 27%|█████████▎                         | 2121/8000 [2:06:05<2:56:03,  1.80s/it]

Episode 2121/8000, real env return = -72.43


 27%|█████████▎                         | 2131/8000 [2:06:18<2:01:12,  1.24s/it]

Episode 2131/8000, real env return = -125.47


 27%|█████████▎                         | 2141/8000 [2:06:34<3:03:57,  1.88s/it]

Episode 2141/8000, real env return = -83.68


 27%|█████████▍                         | 2151/8000 [2:06:50<2:39:10,  1.63s/it]

Episode 2151/8000, real env return = -61.55


 27%|█████████▍                         | 2161/8000 [2:07:06<2:40:20,  1.65s/it]

Episode 2161/8000, real env return = -91.99


 27%|█████████▍                         | 2171/8000 [2:07:20<2:11:51,  1.36s/it]

Episode 2171/8000, real env return = -106.42


 27%|█████████▌                         | 2181/8000 [2:07:39<3:11:09,  1.97s/it]

Episode 2181/8000, real env return = -73.15


 27%|█████████▌                         | 2191/8000 [2:07:55<2:40:20,  1.66s/it]

Episode 2191/8000, real env return = -82.29


 28%|█████████▋                         | 2201/8000 [2:08:14<3:26:22,  2.14s/it]

Episode 2201/8000, real env return = -86.46


 28%|█████████▋                         | 2211/8000 [2:08:32<2:57:52,  1.84s/it]

Episode 2211/8000, real env return = -100.93


 28%|█████████▋                         | 2221/8000 [2:08:51<3:08:23,  1.96s/it]

Episode 2221/8000, real env return = -82.96


 28%|█████████▊                         | 2231/8000 [2:09:08<3:04:47,  1.92s/it]

Episode 2231/8000, real env return = -78.26


 28%|█████████▊                         | 2241/8000 [2:09:29<3:20:33,  2.09s/it]

Episode 2241/8000, real env return = -66.51


 28%|█████████▊                         | 2251/8000 [2:09:44<2:22:09,  1.48s/it]

Episode 2251/8000, real env return = -118.88


 28%|█████████▉                         | 2261/8000 [2:10:02<2:29:27,  1.56s/it]

Episode 2261/8000, real env return = -75.84


 28%|█████████▉                         | 2271/8000 [2:10:21<3:22:01,  2.12s/it]

Episode 2271/8000, real env return = -77.23


 29%|█████████▉                         | 2281/8000 [2:10:39<2:57:01,  1.86s/it]

Episode 2281/8000, real env return = -74.56


 29%|██████████                         | 2291/8000 [2:10:57<2:52:43,  1.82s/it]

Episode 2291/8000, real env return = -49.61


 29%|██████████                         | 2301/8000 [2:11:16<3:14:42,  2.05s/it]

Episode 2301/8000, real env return = -65.05


 29%|██████████                         | 2311/8000 [2:11:36<2:49:50,  1.79s/it]

Episode 2311/8000, real env return = -50.55


 29%|██████████▏                        | 2321/8000 [2:11:58<3:33:16,  2.25s/it]

Episode 2321/8000, real env return = -49.54


 29%|██████████▏                        | 2331/8000 [2:12:12<2:19:48,  1.48s/it]

Episode 2331/8000, real env return = -101.11


 29%|██████████▏                        | 2341/8000 [2:12:29<3:08:02,  1.99s/it]

Episode 2341/8000, real env return = -68.19


 29%|██████████▎                        | 2351/8000 [2:12:46<2:48:48,  1.79s/it]

Episode 2351/8000, real env return = -46.50


 30%|██████████▎                        | 2361/8000 [2:13:04<2:59:03,  1.91s/it]

Episode 2361/8000, real env return = -60.78


 30%|██████████▎                        | 2371/8000 [2:13:23<3:21:19,  2.15s/it]

Episode 2371/8000, real env return = -40.23


 30%|██████████▍                        | 2381/8000 [2:13:42<3:10:08,  2.03s/it]

Episode 2381/8000, real env return = -52.98


 30%|██████████▍                        | 2391/8000 [2:14:00<2:48:48,  1.81s/it]

Episode 2391/8000, real env return = -47.31


 30%|██████████▌                        | 2401/8000 [2:14:20<2:52:58,  1.85s/it]

Episode 2401/8000, real env return = -41.98


 30%|██████████▌                        | 2411/8000 [2:14:40<3:07:07,  2.01s/it]

Episode 2411/8000, real env return = -59.14


 30%|██████████▌                        | 2421/8000 [2:14:56<2:36:46,  1.69s/it]

Episode 2421/8000, real env return = -116.43


 30%|██████████▋                        | 2431/8000 [2:15:15<3:09:01,  2.04s/it]

Episode 2431/8000, real env return = -47.11


 31%|██████████▋                        | 2441/8000 [2:15:37<3:17:16,  2.13s/it]

Episode 2441/8000, real env return = -58.06


 31%|██████████▋                        | 2451/8000 [2:15:55<2:23:12,  1.55s/it]

Episode 2451/8000, real env return = -52.38


 31%|██████████▊                        | 2461/8000 [2:16:15<2:57:28,  1.92s/it]

Episode 2461/8000, real env return = -47.12


 31%|██████████▊                        | 2471/8000 [2:16:33<3:11:07,  2.07s/it]

Episode 2471/8000, real env return = -53.62


 31%|██████████▊                        | 2481/8000 [2:16:54<2:44:20,  1.79s/it]

Episode 2481/8000, real env return = -57.89


 31%|██████████▉                        | 2491/8000 [2:17:12<2:59:36,  1.96s/it]

Episode 2491/8000, real env return = -58.13


 31%|██████████▉                        | 2501/8000 [2:17:32<2:54:42,  1.91s/it]

Episode 2501/8000, real env return = -58.21


 31%|██████████▉                        | 2511/8000 [2:17:51<3:14:55,  2.13s/it]

Episode 2511/8000, real env return = -55.92


 32%|███████████                        | 2521/8000 [2:18:11<3:13:57,  2.12s/it]

Episode 2521/8000, real env return = -47.19


 32%|███████████                        | 2531/8000 [2:18:34<3:45:49,  2.48s/it]

Episode 2531/8000, real env return = -41.11


 32%|███████████                        | 2541/8000 [2:18:54<3:20:07,  2.20s/it]

Episode 2541/8000, real env return = -95.06


 32%|███████████▏                       | 2551/8000 [2:19:14<2:36:13,  1.72s/it]

Episode 2551/8000, real env return = -111.78


 32%|███████████▏                       | 2561/8000 [2:19:34<3:09:57,  2.10s/it]

Episode 2561/8000, real env return = -61.80


 32%|███████████▏                       | 2571/8000 [2:19:54<3:08:53,  2.09s/it]

Episode 2571/8000, real env return = -56.66


 32%|███████████▎                       | 2581/8000 [2:20:15<3:09:58,  2.10s/it]

Episode 2581/8000, real env return = -52.94


 32%|███████████▎                       | 2591/8000 [2:20:35<3:11:49,  2.13s/it]

Episode 2591/8000, real env return = -40.61


 33%|███████████▍                       | 2601/8000 [2:20:56<3:02:54,  2.03s/it]

Episode 2601/8000, real env return = -53.08


 33%|███████████▍                       | 2611/8000 [2:21:17<2:57:19,  1.97s/it]

Episode 2611/8000, real env return = -53.61


 33%|███████████▍                       | 2621/8000 [2:21:37<2:48:42,  1.88s/it]

Episode 2621/8000, real env return = -51.25


 33%|███████████▌                       | 2631/8000 [2:21:59<3:20:58,  2.25s/it]

Episode 2631/8000, real env return = -62.15


 33%|███████████▌                       | 2641/8000 [2:22:16<2:55:35,  1.97s/it]

Episode 2641/8000, real env return = -46.91


 33%|███████████▌                       | 2651/8000 [2:22:37<3:11:21,  2.15s/it]

Episode 2651/8000, real env return = -50.85


 33%|███████████▋                       | 2661/8000 [2:22:57<2:51:04,  1.92s/it]

Episode 2661/8000, real env return = -55.69


 33%|███████████▋                       | 2671/8000 [2:23:16<3:07:22,  2.11s/it]

Episode 2671/8000, real env return = -54.95


 34%|███████████▋                       | 2681/8000 [2:23:38<3:36:08,  2.44s/it]

Episode 2681/8000, real env return = -45.69


 34%|███████████▊                       | 2691/8000 [2:23:57<2:48:12,  1.90s/it]

Episode 2691/8000, real env return = -54.33


 34%|███████████▊                       | 2701/8000 [2:24:19<3:27:50,  2.35s/it]

Episode 2701/8000, real env return = -46.15


 34%|███████████▊                       | 2711/8000 [2:24:40<3:07:43,  2.13s/it]

Episode 2711/8000, real env return = -51.91


 34%|███████████▉                       | 2721/8000 [2:25:02<3:14:57,  2.22s/it]

Episode 2721/8000, real env return = -37.44


 34%|███████████▉                       | 2731/8000 [2:25:25<3:24:46,  2.33s/it]

Episode 2731/8000, real env return = -48.18


 34%|███████████▉                       | 2741/8000 [2:25:44<2:59:24,  2.05s/it]

Episode 2741/8000, real env return = -37.02


 34%|████████████                       | 2751/8000 [2:26:06<3:04:31,  2.11s/it]

Episode 2751/8000, real env return = -46.13


 35%|████████████                       | 2761/8000 [2:26:29<3:16:48,  2.25s/it]

Episode 2761/8000, real env return = -43.01


 35%|████████████                       | 2771/8000 [2:26:52<3:30:19,  2.41s/it]

Episode 2771/8000, real env return = -49.30


 35%|████████████▏                      | 2781/8000 [2:27:11<2:45:48,  1.91s/it]

Episode 2781/8000, real env return = -48.20


 35%|████████████▏                      | 2791/8000 [2:27:29<2:30:54,  1.74s/it]

Episode 2791/8000, real env return = -42.83


 35%|████████████▎                      | 2801/8000 [2:27:51<2:45:52,  1.91s/it]

Episode 2801/8000, real env return = -53.05


 35%|████████████▎                      | 2811/8000 [2:28:12<2:48:20,  1.95s/it]

Episode 2811/8000, real env return = -48.24


 35%|████████████▎                      | 2821/8000 [2:28:32<2:49:18,  1.96s/it]

Episode 2821/8000, real env return = -39.03


 35%|████████████▍                      | 2831/8000 [2:28:51<2:37:27,  1.83s/it]

Episode 2831/8000, real env return = -45.88


 36%|████████████▍                      | 2841/8000 [2:29:15<3:23:01,  2.36s/it]

Episode 2841/8000, real env return = -45.70


 36%|████████████▍                      | 2851/8000 [2:29:41<3:42:30,  2.59s/it]

Episode 2851/8000, real env return = -54.45


 36%|████████████▌                      | 2861/8000 [2:30:05<3:42:39,  2.60s/it]

Episode 2861/8000, real env return = -42.13


 36%|████████████▌                      | 2871/8000 [2:30:24<3:02:37,  2.14s/it]

Episode 2871/8000, real env return = -38.06


 36%|████████████▌                      | 2881/8000 [2:30:48<3:16:04,  2.30s/it]

Episode 2881/8000, real env return = -37.42


 36%|████████████▋                      | 2891/8000 [2:31:09<3:14:08,  2.28s/it]

Episode 2891/8000, real env return = -35.67


 36%|████████████▋                      | 2901/8000 [2:31:29<2:42:10,  1.91s/it]

Episode 2901/8000, real env return = -43.56


 36%|████████████▋                      | 2911/8000 [2:31:52<3:06:20,  2.20s/it]

Episode 2911/8000, real env return = -50.28


 37%|████████████▊                      | 2921/8000 [2:32:12<2:38:50,  1.88s/it]

Episode 2921/8000, real env return = -67.04


 37%|████████████▊                      | 2931/8000 [2:32:34<2:48:50,  2.00s/it]

Episode 2931/8000, real env return = -53.96


 37%|████████████▊                      | 2941/8000 [2:32:54<2:45:20,  1.96s/it]

Episode 2941/8000, real env return = -33.08


 37%|████████████▉                      | 2951/8000 [2:33:15<3:02:14,  2.17s/it]

Episode 2951/8000, real env return = -43.07


 37%|████████████▉                      | 2961/8000 [2:33:36<2:51:44,  2.04s/it]

Episode 2961/8000, real env return = -41.92


 37%|████████████▉                      | 2971/8000 [2:33:58<3:12:25,  2.30s/it]

Episode 2971/8000, real env return = -47.33


 37%|█████████████                      | 2981/8000 [2:34:18<2:38:03,  1.89s/it]

Episode 2981/8000, real env return = -49.25


 37%|█████████████                      | 2991/8000 [2:34:40<2:47:51,  2.01s/it]

Episode 2991/8000, real env return = -45.07


 38%|█████████████▏                     | 3001/8000 [2:35:00<2:34:08,  1.85s/it]

Episode 3001/8000, real env return = -46.98


 38%|█████████████▏                     | 3011/8000 [2:35:21<3:07:00,  2.25s/it]

Episode 3011/8000, real env return = -51.38


 38%|█████████████▏                     | 3021/8000 [2:35:41<2:56:34,  2.13s/it]

Episode 3021/8000, real env return = -54.56


 38%|█████████████▎                     | 3031/8000 [2:36:02<2:52:02,  2.08s/it]

Episode 3031/8000, real env return = -63.94


 38%|█████████████▎                     | 3041/8000 [2:36:24<2:51:50,  2.08s/it]

Episode 3041/8000, real env return = -48.81


 38%|█████████████▎                     | 3051/8000 [2:36:47<3:22:57,  2.46s/it]

Episode 3051/8000, real env return = -35.75


 38%|█████████████▍                     | 3061/8000 [2:37:09<3:02:14,  2.21s/it]

Episode 3061/8000, real env return = -60.29


 38%|█████████████▍                     | 3071/8000 [2:37:29<2:35:40,  1.90s/it]

Episode 3071/8000, real env return = -40.81


 39%|█████████████▍                     | 3081/8000 [2:37:49<2:44:08,  2.00s/it]

Episode 3081/8000, real env return = -83.04


 39%|█████████████▌                     | 3091/8000 [2:38:10<2:49:17,  2.07s/it]

Episode 3091/8000, real env return = -43.16


 39%|█████████████▌                     | 3101/8000 [2:38:31<2:59:41,  2.20s/it]

Episode 3101/8000, real env return = -43.76


 39%|█████████████▌                     | 3111/8000 [2:38:51<2:25:08,  1.78s/it]

Episode 3111/8000, real env return = -63.95


 39%|█████████████▋                     | 3121/8000 [2:39:13<3:01:32,  2.23s/it]

Episode 3121/8000, real env return = -66.35


 39%|█████████████▋                     | 3131/8000 [2:39:35<2:55:22,  2.16s/it]

Episode 3131/8000, real env return = -70.13


 39%|█████████████▋                     | 3141/8000 [2:40:00<3:19:14,  2.46s/it]

Episode 3141/8000, real env return = -58.80


 39%|█████████████▊                     | 3151/8000 [2:40:23<2:55:24,  2.17s/it]

Episode 3151/8000, real env return = -44.52


 40%|█████████████▊                     | 3161/8000 [2:40:42<2:43:57,  2.03s/it]

Episode 3161/8000, real env return = -43.46


 40%|█████████████▊                     | 3171/8000 [2:41:06<3:25:03,  2.55s/it]

Episode 3171/8000, real env return = -51.90


 40%|█████████████▉                     | 3181/8000 [2:41:29<3:12:48,  2.40s/it]

Episode 3181/8000, real env return = -56.87


 40%|█████████████▉                     | 3191/8000 [2:41:54<3:14:25,  2.43s/it]

Episode 3191/8000, real env return = -113.83


 40%|██████████████                     | 3201/8000 [2:42:15<2:50:07,  2.13s/it]

Episode 3201/8000, real env return = -41.53


 40%|██████████████                     | 3211/8000 [2:42:39<3:06:40,  2.34s/it]

Episode 3211/8000, real env return = -50.84


 40%|██████████████                     | 3221/8000 [2:43:02<2:56:36,  2.22s/it]

Episode 3221/8000, real env return = -47.41


 40%|██████████████▏                    | 3231/8000 [2:43:23<2:31:34,  1.91s/it]

Episode 3231/8000, real env return = -43.73


 41%|██████████████▏                    | 3241/8000 [2:43:47<3:05:51,  2.34s/it]

Episode 3241/8000, real env return = -43.77


 41%|██████████████▏                    | 3251/8000 [2:44:11<3:07:33,  2.37s/it]

Episode 3251/8000, real env return = -45.16


 41%|██████████████▎                    | 3261/8000 [2:44:33<2:41:31,  2.05s/it]

Episode 3261/8000, real env return = -58.98


 41%|██████████████▎                    | 3271/8000 [2:44:53<2:37:37,  2.00s/it]

Episode 3271/8000, real env return = -58.37


 41%|██████████████▎                    | 3281/8000 [2:45:16<2:47:52,  2.13s/it]

Episode 3281/8000, real env return = -87.77


 41%|██████████████▍                    | 3291/8000 [2:45:40<2:38:57,  2.03s/it]

Episode 3291/8000, real env return = -76.31


 41%|██████████████▍                    | 3301/8000 [2:46:02<2:44:52,  2.11s/it]

Episode 3301/8000, real env return = -64.14


 41%|██████████████▍                    | 3311/8000 [2:46:27<3:39:49,  2.81s/it]

Episode 3311/8000, real env return = -77.57


 42%|██████████████▌                    | 3321/8000 [2:46:47<2:24:06,  1.85s/it]

Episode 3321/8000, real env return = -97.67


 42%|██████████████▌                    | 3331/8000 [2:47:08<2:44:57,  2.12s/it]

Episode 3331/8000, real env return = -101.17


 42%|██████████████▌                    | 3341/8000 [2:47:30<2:44:48,  2.12s/it]

Episode 3341/8000, real env return = -96.58


 42%|██████████████▋                    | 3351/8000 [2:47:56<3:20:02,  2.58s/it]

Episode 3351/8000, real env return = -95.50


 42%|██████████████▋                    | 3361/8000 [2:48:21<3:15:40,  2.53s/it]

Episode 3361/8000, real env return = -102.59


 42%|██████████████▋                    | 3371/8000 [2:48:45<2:57:40,  2.30s/it]

Episode 3371/8000, real env return = -106.29


 42%|██████████████▊                    | 3381/8000 [2:49:07<2:52:13,  2.24s/it]

Episode 3381/8000, real env return = -100.63


 42%|██████████████▊                    | 3391/8000 [2:49:29<2:57:41,  2.31s/it]

Episode 3391/8000, real env return = -133.63


 43%|██████████████▉                    | 3401/8000 [2:49:53<3:14:28,  2.54s/it]

Episode 3401/8000, real env return = -125.90


 43%|██████████████▉                    | 3411/8000 [2:50:18<3:11:10,  2.50s/it]

Episode 3411/8000, real env return = -105.25


 43%|██████████████▉                    | 3421/8000 [2:50:44<3:42:49,  2.92s/it]

Episode 3421/8000, real env return = -109.31


 43%|███████████████                    | 3431/8000 [2:51:09<3:21:18,  2.64s/it]

Episode 3431/8000, real env return = -140.17


 43%|███████████████                    | 3441/8000 [2:51:32<2:55:33,  2.31s/it]

Episode 3441/8000, real env return = -138.15


 43%|███████████████                    | 3451/8000 [2:51:59<3:31:27,  2.79s/it]

Episode 3451/8000, real env return = -137.07


 43%|███████████████▏                   | 3461/8000 [2:52:24<3:19:36,  2.64s/it]

Episode 3461/8000, real env return = -105.98


 43%|███████████████▏                   | 3471/8000 [2:52:48<2:58:59,  2.37s/it]

Episode 3471/8000, real env return = -133.89


 44%|███████████████▏                   | 3481/8000 [2:53:10<2:30:01,  1.99s/it]

Episode 3481/8000, real env return = -98.28


 44%|███████████████▎                   | 3491/8000 [2:53:34<3:03:49,  2.45s/it]

Episode 3491/8000, real env return = -105.46


 44%|███████████████▎                   | 3501/8000 [2:53:59<3:01:01,  2.41s/it]

Episode 3501/8000, real env return = -84.76


 44%|███████████████▎                   | 3511/8000 [2:54:21<2:44:11,  2.19s/it]

Episode 3511/8000, real env return = -85.98


 44%|███████████████▍                   | 3521/8000 [2:54:46<3:07:51,  2.52s/it]

Episode 3521/8000, real env return = -93.98


 44%|███████████████▍                   | 3531/8000 [2:55:09<2:38:46,  2.13s/it]

Episode 3531/8000, real env return = -94.79


 44%|███████████████▍                   | 3541/8000 [2:55:32<2:36:15,  2.10s/it]

Episode 3541/8000, real env return = -103.45


 44%|███████████████▌                   | 3551/8000 [2:55:54<2:58:37,  2.41s/it]

Episode 3551/8000, real env return = -79.26


 45%|███████████████▌                   | 3561/8000 [2:56:16<2:26:13,  1.98s/it]

Episode 3561/8000, real env return = -69.77


 45%|███████████████▌                   | 3571/8000 [2:56:40<3:09:10,  2.56s/it]

Episode 3571/8000, real env return = -68.64


 45%|███████████████▋                   | 3581/8000 [2:57:05<2:48:58,  2.29s/it]

Episode 3581/8000, real env return = -76.16


 45%|███████████████▋                   | 3591/8000 [2:57:27<2:46:29,  2.27s/it]

Episode 3591/8000, real env return = -57.55


 45%|███████████████▊                   | 3601/8000 [2:57:53<3:24:43,  2.79s/it]

Episode 3601/8000, real env return = -66.85


 45%|███████████████▊                   | 3611/8000 [2:58:17<2:59:38,  2.46s/it]

Episode 3611/8000, real env return = -56.67


 45%|███████████████▊                   | 3621/8000 [2:58:45<3:18:25,  2.72s/it]

Episode 3621/8000, real env return = -42.36


 45%|███████████████▉                   | 3631/8000 [2:59:09<2:42:59,  2.24s/it]

Episode 3631/8000, real env return = -124.81


 46%|███████████████▉                   | 3641/8000 [2:59:34<2:52:42,  2.38s/it]

Episode 3641/8000, real env return = -64.67


 46%|███████████████▉                   | 3651/8000 [2:59:59<3:07:39,  2.59s/it]

Episode 3651/8000, real env return = -55.29


 46%|████████████████                   | 3661/8000 [3:00:24<3:06:35,  2.58s/it]

Episode 3661/8000, real env return = -46.83


 46%|████████████████                   | 3671/8000 [3:00:49<3:10:29,  2.64s/it]

Episode 3671/8000, real env return = -35.50


 46%|████████████████                   | 3681/8000 [3:01:13<3:02:35,  2.54s/it]

Episode 3681/8000, real env return = -35.50


 46%|████████████████▏                  | 3691/8000 [3:01:38<2:44:48,  2.29s/it]

Episode 3691/8000, real env return = -31.58


 46%|████████████████▏                  | 3701/8000 [3:02:00<2:35:52,  2.18s/it]

Episode 3701/8000, real env return = -28.41


 46%|████████████████▏                  | 3711/8000 [3:02:28<3:25:22,  2.87s/it]

Episode 3711/8000, real env return = -33.63


 47%|████████████████▎                  | 3721/8000 [3:02:55<3:09:03,  2.65s/it]

Episode 3721/8000, real env return = -29.04


 47%|████████████████▎                  | 3731/8000 [3:03:20<2:47:20,  2.35s/it]

Episode 3731/8000, real env return = -39.52


 47%|████████████████▎                  | 3741/8000 [3:03:40<2:26:49,  2.07s/it]

Episode 3741/8000, real env return = -47.49


 47%|████████████████▍                  | 3751/8000 [3:04:06<2:54:00,  2.46s/it]

Episode 3751/8000, real env return = -30.99


 47%|████████████████▍                  | 3761/8000 [3:04:29<2:51:55,  2.43s/it]

Episode 3761/8000, real env return = -40.42


 47%|████████████████▍                  | 3771/8000 [3:04:51<2:52:30,  2.45s/it]

Episode 3771/8000, real env return = -28.89


 47%|████████████████▌                  | 3781/8000 [3:05:14<3:11:17,  2.72s/it]

Episode 3781/8000, real env return = -30.79


 47%|████████████████▌                  | 3791/8000 [3:05:40<2:44:35,  2.35s/it]

Episode 3791/8000, real env return = -36.70


 48%|████████████████▋                  | 3801/8000 [3:06:02<2:18:40,  1.98s/it]

Episode 3801/8000, real env return = -27.66


 48%|████████████████▋                  | 3811/8000 [3:06:26<2:53:42,  2.49s/it]

Episode 3811/8000, real env return = -35.17


 48%|████████████████▋                  | 3821/8000 [3:06:49<2:33:27,  2.20s/it]

Episode 3821/8000, real env return = -28.06


 48%|████████████████▊                  | 3831/8000 [3:07:15<2:57:17,  2.55s/it]

Episode 3831/8000, real env return = -28.77


 48%|████████████████▊                  | 3841/8000 [3:07:38<2:19:52,  2.02s/it]

Episode 3841/8000, real env return = -33.84


 48%|████████████████▊                  | 3851/8000 [3:08:02<2:56:05,  2.55s/it]

Episode 3851/8000, real env return = -42.60


 48%|████████████████▉                  | 3861/8000 [3:08:24<2:53:44,  2.52s/it]

Episode 3861/8000, real env return = -50.61


 48%|████████████████▉                  | 3871/8000 [3:08:46<2:45:16,  2.40s/it]

Episode 3871/8000, real env return = -35.75


 49%|████████████████▉                  | 3881/8000 [3:09:07<2:13:25,  1.94s/it]

Episode 3881/8000, real env return = -23.55


 49%|█████████████████                  | 3891/8000 [3:09:31<2:42:40,  2.38s/it]

Episode 3891/8000, real env return = -34.16


 49%|█████████████████                  | 3901/8000 [3:09:56<2:50:10,  2.49s/it]

Episode 3901/8000, real env return = -28.34


 49%|█████████████████                  | 3911/8000 [3:10:20<2:45:46,  2.43s/it]

Episode 3911/8000, real env return = -30.81


 49%|█████████████████▏                 | 3921/8000 [3:10:41<2:23:20,  2.11s/it]

Episode 3921/8000, real env return = -33.43


 49%|█████████████████▏                 | 3931/8000 [3:11:08<3:08:56,  2.79s/it]

Episode 3931/8000, real env return = -66.29


 49%|█████████████████▏                 | 3941/8000 [3:11:33<2:48:57,  2.50s/it]

Episode 3941/8000, real env return = -34.22


 49%|█████████████████▎                 | 3951/8000 [3:12:01<2:48:15,  2.49s/it]

Episode 3951/8000, real env return = -31.62


 50%|█████████████████▎                 | 3961/8000 [3:12:25<2:49:39,  2.52s/it]

Episode 3961/8000, real env return = -35.87


 50%|█████████████████▎                 | 3971/8000 [3:12:48<2:36:47,  2.34s/it]

Episode 3971/8000, real env return = -47.61


 50%|█████████████████▍                 | 3981/8000 [3:13:11<2:53:21,  2.59s/it]

Episode 3981/8000, real env return = -33.56


 50%|█████████████████▍                 | 3991/8000 [3:13:36<2:57:07,  2.65s/it]

Episode 3991/8000, real env return = -40.79


 50%|█████████████████▌                 | 4001/8000 [3:14:03<2:52:44,  2.59s/it]

Episode 4001/8000, real env return = -43.99


 50%|█████████████████▌                 | 4011/8000 [3:14:30<2:57:54,  2.68s/it]

Episode 4011/8000, real env return = -42.87


 50%|█████████████████▌                 | 4021/8000 [3:14:53<2:36:33,  2.36s/it]

Episode 4021/8000, real env return = -37.70


 50%|█████████████████▋                 | 4031/8000 [3:15:16<2:29:15,  2.26s/it]

Episode 4031/8000, real env return = -40.29


 51%|█████████████████▋                 | 4041/8000 [3:15:41<2:40:25,  2.43s/it]

Episode 4041/8000, real env return = -36.41


 51%|█████████████████▋                 | 4051/8000 [3:16:03<2:18:41,  2.11s/it]

Episode 4051/8000, real env return = -32.46


 51%|█████████████████▊                 | 4061/8000 [3:16:31<3:07:58,  2.86s/it]

Episode 4061/8000, real env return = -32.15


 51%|█████████████████▊                 | 4071/8000 [3:16:52<2:20:49,  2.15s/it]

Episode 4071/8000, real env return = -39.49


 51%|█████████████████▊                 | 4081/8000 [3:17:17<2:53:40,  2.66s/it]

Episode 4081/8000, real env return = -40.14


 51%|█████████████████▉                 | 4091/8000 [3:17:43<3:00:16,  2.77s/it]

Episode 4091/8000, real env return = -68.54


 51%|█████████████████▉                 | 4101/8000 [3:18:09<2:45:39,  2.55s/it]

Episode 4101/8000, real env return = -43.30


 51%|█████████████████▉                 | 4111/8000 [3:18:33<2:42:05,  2.50s/it]

Episode 4111/8000, real env return = -30.79


 52%|██████████████████                 | 4121/8000 [3:18:56<2:19:33,  2.16s/it]

Episode 4121/8000, real env return = -39.67


 52%|██████████████████                 | 4131/8000 [3:19:22<2:38:38,  2.46s/it]

Episode 4131/8000, real env return = -32.27


 52%|██████████████████                 | 4141/8000 [3:19:47<2:40:55,  2.50s/it]

Episode 4141/8000, real env return = -35.29


 52%|██████████████████▏                | 4151/8000 [3:20:13<2:44:30,  2.56s/it]

Episode 4151/8000, real env return = -34.37


 52%|██████████████████▏                | 4161/8000 [3:20:39<2:59:40,  2.81s/it]

Episode 4161/8000, real env return = -50.18


 52%|██████████████████▏                | 4171/8000 [3:21:07<2:55:50,  2.76s/it]

Episode 4171/8000, real env return = -37.76


 52%|██████████████████▎                | 4181/8000 [3:21:32<2:46:50,  2.62s/it]

Episode 4181/8000, real env return = -40.59


 52%|██████████████████▎                | 4191/8000 [3:22:00<2:54:24,  2.75s/it]

Episode 4191/8000, real env return = -57.41


 53%|██████████████████▍                | 4201/8000 [3:22:28<2:53:42,  2.74s/it]

Episode 4201/8000, real env return = -39.06


 53%|██████████████████▍                | 4211/8000 [3:22:53<2:41:17,  2.55s/it]

Episode 4211/8000, real env return = -30.06


 53%|██████████████████▍                | 4221/8000 [3:23:18<2:38:34,  2.52s/it]

Episode 4221/8000, real env return = -27.63


 53%|██████████████████▌                | 4231/8000 [3:23:40<2:18:06,  2.20s/it]

Episode 4231/8000, real env return = -30.36


 53%|██████████████████▌                | 4241/8000 [3:24:07<2:54:54,  2.79s/it]

Episode 4241/8000, real env return = -27.24


 53%|██████████████████▌                | 4251/8000 [3:24:33<2:35:21,  2.49s/it]

Episode 4251/8000, real env return = -38.63


 53%|██████████████████▋                | 4261/8000 [3:24:57<2:46:03,  2.66s/it]

Episode 4261/8000, real env return = -36.01


 53%|██████████████████▋                | 4271/8000 [3:25:23<2:47:47,  2.70s/it]

Episode 4271/8000, real env return = -27.99


 54%|██████████████████▋                | 4281/8000 [3:25:47<2:19:26,  2.25s/it]

Episode 4281/8000, real env return = -28.98


 54%|██████████████████▊                | 4291/8000 [3:26:12<2:49:07,  2.74s/it]

Episode 4291/8000, real env return = -40.07


 54%|██████████████████▊                | 4301/8000 [3:26:36<2:37:51,  2.56s/it]

Episode 4301/8000, real env return = -25.94


 54%|██████████████████▊                | 4311/8000 [3:27:01<2:41:17,  2.62s/it]

Episode 4311/8000, real env return = -26.46


 54%|██████████████████▉                | 4321/8000 [3:27:26<2:44:24,  2.68s/it]

Episode 4321/8000, real env return = -36.26


 54%|██████████████████▉                | 4331/8000 [3:27:50<2:13:20,  2.18s/it]

Episode 4331/8000, real env return = -36.66


 54%|██████████████████▉                | 4341/8000 [3:28:16<2:28:41,  2.44s/it]

Episode 4341/8000, real env return = -30.60


 54%|███████████████████                | 4351/8000 [3:28:44<2:38:25,  2.60s/it]

Episode 4351/8000, real env return = -27.16


 55%|███████████████████                | 4361/8000 [3:29:08<2:19:54,  2.31s/it]

Episode 4361/8000, real env return = -35.94


 55%|███████████████████                | 4371/8000 [3:29:33<2:33:40,  2.54s/it]

Episode 4371/8000, real env return = -29.72


 55%|███████████████████▏               | 4381/8000 [3:29:59<2:26:56,  2.44s/it]

Episode 4381/8000, real env return = -18.61


 55%|███████████████████▏               | 4391/8000 [3:30:25<2:50:17,  2.83s/it]

Episode 4391/8000, real env return = -13.57


 55%|███████████████████▎               | 4401/8000 [3:30:49<2:09:33,  2.16s/it]

Episode 4401/8000, real env return = -38.02


 55%|███████████████████▎               | 4411/8000 [3:31:14<2:22:22,  2.38s/it]

Episode 4411/8000, real env return = -23.31


 55%|███████████████████▎               | 4421/8000 [3:31:41<2:31:13,  2.54s/it]

Episode 4421/8000, real env return = -21.98


 55%|███████████████████▍               | 4431/8000 [3:32:06<2:28:28,  2.50s/it]

Episode 4431/8000, real env return = -35.64


 56%|███████████████████▍               | 4441/8000 [3:32:31<2:25:15,  2.45s/it]

Episode 4441/8000, real env return = -35.05


 56%|███████████████████▍               | 4451/8000 [3:32:57<2:37:56,  2.67s/it]

Episode 4451/8000, real env return = -34.34


 56%|███████████████████▌               | 4461/8000 [3:33:26<2:56:19,  2.99s/it]

Episode 4461/8000, real env return = -29.19


 56%|███████████████████▌               | 4471/8000 [3:33:51<2:32:28,  2.59s/it]

Episode 4471/8000, real env return = -31.43


 56%|███████████████████▌               | 4481/8000 [3:34:16<2:34:22,  2.63s/it]

Episode 4481/8000, real env return = -33.11


 56%|███████████████████▋               | 4491/8000 [3:34:44<2:25:42,  2.49s/it]

Episode 4491/8000, real env return = -43.22


 56%|███████████████████▋               | 4501/8000 [3:35:08<2:14:18,  2.30s/it]

Episode 4501/8000, real env return = -96.11


 56%|███████████████████▋               | 4511/8000 [3:35:34<2:29:56,  2.58s/it]

Episode 4511/8000, real env return = -89.41


 57%|███████████████████▊               | 4521/8000 [3:36:02<2:55:54,  3.03s/it]

Episode 4521/8000, real env return = -96.16


 57%|███████████████████▊               | 4531/8000 [3:36:30<2:43:22,  2.83s/it]

Episode 4531/8000, real env return = -76.89


 57%|███████████████████▊               | 4541/8000 [3:36:53<2:20:38,  2.44s/it]

Episode 4541/8000, real env return = -88.77


 57%|███████████████████▉               | 4551/8000 [3:37:19<2:16:28,  2.37s/it]

Episode 4551/8000, real env return = -87.95


 57%|███████████████████▉               | 4561/8000 [3:37:46<2:16:16,  2.38s/it]

Episode 4561/8000, real env return = -94.04


 57%|███████████████████▉               | 4571/8000 [3:38:13<2:29:14,  2.61s/it]

Episode 4571/8000, real env return = -103.92


 57%|████████████████████               | 4581/8000 [3:38:37<2:06:12,  2.21s/it]

Episode 4581/8000, real env return = -97.28


 57%|████████████████████               | 4591/8000 [3:39:04<2:51:26,  3.02s/it]

Episode 4591/8000, real env return = -84.06


 58%|████████████████████▏              | 4601/8000 [3:39:29<2:24:12,  2.55s/it]

Episode 4601/8000, real env return = -87.08


 58%|████████████████████▏              | 4611/8000 [3:39:57<2:32:42,  2.70s/it]

Episode 4611/8000, real env return = -97.53


 58%|████████████████████▏              | 4621/8000 [3:40:23<2:22:22,  2.53s/it]

Episode 4621/8000, real env return = -87.98


 58%|████████████████████▎              | 4631/8000 [3:40:52<2:34:24,  2.75s/it]

Episode 4631/8000, real env return = -103.75


 58%|████████████████████▎              | 4641/8000 [3:41:19<2:24:12,  2.58s/it]

Episode 4641/8000, real env return = -88.88


 58%|████████████████████▎              | 4651/8000 [3:41:47<2:33:13,  2.75s/it]

Episode 4651/8000, real env return = -65.41


 58%|████████████████████▍              | 4661/8000 [3:42:15<2:39:14,  2.86s/it]

Episode 4661/8000, real env return = -43.24


 58%|████████████████████▍              | 4671/8000 [3:42:42<2:39:28,  2.87s/it]

Episode 4671/8000, real env return = -47.66


 59%|████████████████████▍              | 4681/8000 [3:43:12<2:53:22,  3.13s/it]

Episode 4681/8000, real env return = -72.70


 59%|████████████████████▌              | 4691/8000 [3:43:38<2:28:47,  2.70s/it]

Episode 4691/8000, real env return = -83.56


 59%|████████████████████▌              | 4701/8000 [3:44:03<2:35:30,  2.83s/it]

Episode 4701/8000, real env return = -77.47


 59%|████████████████████▌              | 4711/8000 [3:44:30<2:36:19,  2.85s/it]

Episode 4711/8000, real env return = -85.69


 59%|████████████████████▋              | 4721/8000 [3:44:57<2:17:11,  2.51s/it]

Episode 4721/8000, real env return = -75.69


 59%|████████████████████▋              | 4731/8000 [3:45:22<2:25:31,  2.67s/it]

Episode 4731/8000, real env return = -66.72


 59%|████████████████████▋              | 4741/8000 [3:45:48<2:25:57,  2.69s/it]

Episode 4741/8000, real env return = -74.46


 59%|████████████████████▊              | 4751/8000 [3:46:15<2:25:41,  2.69s/it]

Episode 4751/8000, real env return = -74.84


 60%|████████████████████▊              | 4761/8000 [3:46:41<2:23:30,  2.66s/it]

Episode 4761/8000, real env return = -73.80


 60%|████████████████████▊              | 4771/8000 [3:47:06<2:01:03,  2.25s/it]

Episode 4771/8000, real env return = -39.49


 60%|████████████████████▉              | 4781/8000 [3:47:29<1:59:10,  2.22s/it]

Episode 4781/8000, real env return = -109.72


 60%|████████████████████▉              | 4791/8000 [3:47:54<2:21:50,  2.65s/it]

Episode 4791/8000, real env return = -45.74


 60%|█████████████████████              | 4801/8000 [3:48:21<2:22:45,  2.68s/it]

Episode 4801/8000, real env return = -32.85


 60%|█████████████████████              | 4811/8000 [3:48:44<2:06:29,  2.38s/it]

Episode 4811/8000, real env return = -38.85


 60%|█████████████████████              | 4821/8000 [3:49:10<2:29:16,  2.82s/it]

Episode 4821/8000, real env return = -86.14


 60%|█████████████████████▏             | 4831/8000 [3:49:40<2:40:09,  3.03s/it]

Episode 4831/8000, real env return = -28.55


 61%|█████████████████████▏             | 4841/8000 [3:50:08<2:19:29,  2.65s/it]

Episode 4841/8000, real env return = -112.60


 61%|█████████████████████▏             | 4851/8000 [3:50:28<1:51:45,  2.13s/it]

Episode 4851/8000, real env return = -32.53


 61%|█████████████████████▎             | 4861/8000 [3:50:50<2:08:31,  2.46s/it]

Episode 4861/8000, real env return = -94.51


 61%|█████████████████████▎             | 4871/8000 [3:51:10<1:54:57,  2.20s/it]

Episode 4871/8000, real env return = -76.71


 61%|█████████████████████▎             | 4881/8000 [3:51:34<2:10:31,  2.51s/it]

Episode 4881/8000, real env return = -41.36


 61%|█████████████████████▍             | 4891/8000 [3:52:03<2:35:29,  3.00s/it]

Episode 4891/8000, real env return = -42.85


 61%|█████████████████████▍             | 4901/8000 [3:52:33<2:32:20,  2.95s/it]

Episode 4901/8000, real env return = -50.72


 61%|█████████████████████▍             | 4911/8000 [3:52:59<2:11:02,  2.55s/it]

Episode 4911/8000, real env return = -36.56


 62%|█████████████████████▌             | 4921/8000 [3:53:25<2:15:25,  2.64s/it]

Episode 4921/8000, real env return = -35.77


 62%|█████████████████████▌             | 4931/8000 [3:53:52<2:20:43,  2.75s/it]

Episode 4931/8000, real env return = -27.68


 62%|█████████████████████▌             | 4941/8000 [3:54:18<2:06:33,  2.48s/it]

Episode 4941/8000, real env return = -28.93


 62%|█████████████████████▋             | 4951/8000 [3:54:45<2:00:53,  2.38s/it]

Episode 4951/8000, real env return = -38.05


 62%|█████████████████████▋             | 4961/8000 [3:55:09<1:59:08,  2.35s/it]

Episode 4961/8000, real env return = -6.18


 62%|█████████████████████▋             | 4971/8000 [3:55:36<2:01:06,  2.40s/it]

Episode 4971/8000, real env return = -33.88


 62%|█████████████████████▊             | 4981/8000 [3:56:03<2:22:26,  2.83s/it]

Episode 4981/8000, real env return = -31.49


 62%|█████████████████████▊             | 4991/8000 [3:56:32<2:14:26,  2.68s/it]

Episode 4991/8000, real env return = -26.86


 63%|█████████████████████▉             | 5001/8000 [3:57:01<2:22:47,  2.86s/it]

Episode 5001/8000, real env return = -34.35


 63%|█████████████████████▉             | 5011/8000 [3:57:28<2:05:27,  2.52s/it]

Episode 5011/8000, real env return = -44.66


 63%|█████████████████████▉             | 5021/8000 [3:57:53<2:00:56,  2.44s/it]

Episode 5021/8000, real env return = -122.31


 63%|██████████████████████             | 5031/8000 [3:58:21<2:15:56,  2.75s/it]

Episode 5031/8000, real env return = -43.12


 63%|██████████████████████             | 5041/8000 [3:58:50<2:18:47,  2.81s/it]

Episode 5041/8000, real env return = -61.96


 63%|██████████████████████             | 5051/8000 [3:59:18<2:25:56,  2.97s/it]

Episode 5051/8000, real env return = -38.74


 63%|██████████████████████▏            | 5061/8000 [3:59:45<2:05:49,  2.57s/it]

Episode 5061/8000, real env return = -63.39


 63%|██████████████████████▏            | 5071/8000 [4:00:11<1:54:44,  2.35s/it]

Episode 5071/8000, real env return = -51.72


 64%|██████████████████████▏            | 5081/8000 [4:00:36<2:00:40,  2.48s/it]

Episode 5081/8000, real env return = -62.98


 64%|██████████████████████▎            | 5091/8000 [4:01:03<2:10:33,  2.69s/it]

Episode 5091/8000, real env return = -91.72


 64%|██████████████████████▎            | 5101/8000 [4:01:28<2:07:26,  2.64s/it]

Episode 5101/8000, real env return = -77.29


 64%|██████████████████████▎            | 5111/8000 [4:01:54<1:52:02,  2.33s/it]

Episode 5111/8000, real env return = -103.70


 64%|██████████████████████▍            | 5121/8000 [4:02:14<1:28:13,  1.84s/it]

Episode 5121/8000, real env return = -104.78


 64%|██████████████████████▍            | 5131/8000 [4:02:41<2:14:46,  2.82s/it]

Episode 5131/8000, real env return = -55.52


 64%|██████████████████████▍            | 5141/8000 [4:03:08<2:15:47,  2.85s/it]

Episode 5141/8000, real env return = -59.95


 64%|██████████████████████▌            | 5151/8000 [4:03:33<2:00:51,  2.55s/it]

Episode 5151/8000, real env return = -61.12


 65%|██████████████████████▌            | 5161/8000 [4:04:00<2:07:01,  2.68s/it]

Episode 5161/8000, real env return = -62.95


 65%|██████████████████████▌            | 5171/8000 [4:04:25<1:53:38,  2.41s/it]

Episode 5171/8000, real env return = -53.91


 65%|██████████████████████▋            | 5181/8000 [4:04:54<2:20:14,  2.98s/it]

Episode 5181/8000, real env return = -55.59


 65%|██████████████████████▋            | 5191/8000 [4:05:22<1:58:09,  2.52s/it]

Episode 5191/8000, real env return = -58.65


 65%|██████████████████████▊            | 5201/8000 [4:05:44<1:49:20,  2.34s/it]

Episode 5201/8000, real env return = -58.46


 65%|██████████████████████▊            | 5211/8000 [4:06:12<2:10:25,  2.81s/it]

Episode 5211/8000, real env return = -56.32


 65%|██████████████████████▊            | 5221/8000 [4:06:41<2:08:58,  2.78s/it]

Episode 5221/8000, real env return = -41.97


 65%|██████████████████████▉            | 5231/8000 [4:07:06<1:57:50,  2.55s/it]

Episode 5231/8000, real env return = -35.48


 66%|██████████████████████▉            | 5241/8000 [4:07:34<2:09:05,  2.81s/it]

Episode 5241/8000, real env return = -28.62


 66%|██████████████████████▉            | 5251/8000 [4:08:00<2:10:30,  2.85s/it]

Episode 5251/8000, real env return = -43.32


 66%|███████████████████████            | 5261/8000 [4:08:25<1:58:42,  2.60s/it]

Episode 5261/8000, real env return = -98.30


 66%|███████████████████████            | 5271/8000 [4:08:51<2:09:48,  2.85s/it]

Episode 5271/8000, real env return = -44.39


 66%|███████████████████████            | 5281/8000 [4:09:16<1:57:16,  2.59s/it]

Episode 5281/8000, real env return = -35.62


 66%|███████████████████████▏           | 5291/8000 [4:09:43<1:56:06,  2.57s/it]

Episode 5291/8000, real env return = -27.75


 66%|███████████████████████▏           | 5301/8000 [4:10:08<2:02:36,  2.73s/it]

Episode 5301/8000, real env return = -32.97


 66%|███████████████████████▏           | 5311/8000 [4:10:33<1:57:22,  2.62s/it]

Episode 5311/8000, real env return = -26.28


 67%|███████████████████████▎           | 5321/8000 [4:10:57<1:46:49,  2.39s/it]

Episode 5321/8000, real env return = -34.91


 67%|███████████████████████▎           | 5331/8000 [4:11:22<1:46:21,  2.39s/it]

Episode 5331/8000, real env return = -40.96


 67%|███████████████████████▎           | 5341/8000 [4:11:49<2:02:24,  2.76s/it]

Episode 5341/8000, real env return = -40.55


 67%|███████████████████████▍           | 5351/8000 [4:12:14<1:54:14,  2.59s/it]

Episode 5351/8000, real env return = -48.04


 67%|███████████████████████▍           | 5361/8000 [4:12:40<1:51:18,  2.53s/it]

Episode 5361/8000, real env return = -44.56


 67%|███████████████████████▍           | 5371/8000 [4:13:05<1:43:43,  2.37s/it]

Episode 5371/8000, real env return = -41.63


 67%|███████████████████████▌           | 5381/8000 [4:13:31<2:01:03,  2.77s/it]

Episode 5381/8000, real env return = -35.66


 67%|███████████████████████▌           | 5391/8000 [4:13:57<1:44:09,  2.40s/it]

Episode 5391/8000, real env return = -41.48


 68%|███████████████████████▋           | 5401/8000 [4:14:23<1:49:11,  2.52s/it]

Episode 5401/8000, real env return = -39.94


 68%|███████████████████████▋           | 5411/8000 [4:14:49<2:04:46,  2.89s/it]

Episode 5411/8000, real env return = -40.32


 68%|███████████████████████▋           | 5421/8000 [4:15:16<2:00:13,  2.80s/it]

Episode 5421/8000, real env return = -59.94


 68%|███████████████████████▊           | 5431/8000 [4:15:42<1:50:08,  2.57s/it]

Episode 5431/8000, real env return = -33.27


 68%|███████████████████████▊           | 5441/8000 [4:16:05<1:44:09,  2.44s/it]

Episode 5441/8000, real env return = -41.14


 68%|███████████████████████▊           | 5451/8000 [4:16:31<1:58:19,  2.79s/it]

Episode 5451/8000, real env return = -38.68


 68%|███████████████████████▉           | 5461/8000 [4:17:00<1:54:11,  2.70s/it]

Episode 5461/8000, real env return = -41.20


 68%|███████████████████████▉           | 5471/8000 [4:17:26<1:41:44,  2.41s/it]

Episode 5471/8000, real env return = -34.05


 69%|███████████████████████▉           | 5481/8000 [4:17:51<1:42:10,  2.43s/it]

Episode 5481/8000, real env return = -48.03


 69%|████████████████████████           | 5491/8000 [4:18:18<1:50:38,  2.65s/it]

Episode 5491/8000, real env return = -35.16


 69%|████████████████████████           | 5501/8000 [4:18:44<1:43:54,  2.49s/it]

Episode 5501/8000, real env return = -38.27


 69%|████████████████████████           | 5511/8000 [4:19:10<1:50:07,  2.65s/it]

Episode 5511/8000, real env return = -35.56


 69%|████████████████████████▏          | 5521/8000 [4:19:37<1:45:15,  2.55s/it]

Episode 5521/8000, real env return = -36.15


 69%|████████████████████████▏          | 5531/8000 [4:20:05<1:53:22,  2.76s/it]

Episode 5531/8000, real env return = -33.92


 69%|████████████████████████▏          | 5541/8000 [4:20:32<1:46:19,  2.59s/it]

Episode 5541/8000, real env return = -38.77


 69%|████████████████████████▎          | 5551/8000 [4:21:01<1:59:10,  2.92s/it]

Episode 5551/8000, real env return = -43.00


 70%|████████████████████████▎          | 5561/8000 [4:21:24<1:40:15,  2.47s/it]

Episode 5561/8000, real env return = -127.61


 70%|████████████████████████▎          | 5571/8000 [4:21:51<1:48:41,  2.68s/it]

Episode 5571/8000, real env return = -50.91


 70%|████████████████████████▍          | 5581/8000 [4:22:17<1:47:23,  2.66s/it]

Episode 5581/8000, real env return = -39.32


 70%|████████████████████████▍          | 5591/8000 [4:22:42<1:33:06,  2.32s/it]

Episode 5591/8000, real env return = -45.77


 70%|████████████████████████▌          | 5601/8000 [4:23:11<1:52:08,  2.80s/it]

Episode 5601/8000, real env return = -32.04


 70%|████████████████████████▌          | 5611/8000 [4:23:37<1:33:04,  2.34s/it]

Episode 5611/8000, real env return = -31.05


 70%|████████████████████████▌          | 5621/8000 [4:24:06<1:59:19,  3.01s/it]

Episode 5621/8000, real env return = -36.13


 70%|████████████████████████▋          | 5631/8000 [4:24:32<1:46:29,  2.70s/it]

Episode 5631/8000, real env return = -40.92


 71%|████████████████████████▋          | 5641/8000 [4:24:59<1:38:41,  2.51s/it]

Episode 5641/8000, real env return = -35.43


 71%|████████████████████████▋          | 5651/8000 [4:25:26<1:44:32,  2.67s/it]

Episode 5651/8000, real env return = -29.22


 71%|████████████████████████▊          | 5661/8000 [4:25:51<1:29:38,  2.30s/it]

Episode 5661/8000, real env return = -38.29


 71%|████████████████████████▊          | 5671/8000 [4:26:19<1:42:36,  2.64s/it]

Episode 5671/8000, real env return = -34.56


 71%|████████████████████████▊          | 5681/8000 [4:26:43<1:24:20,  2.18s/it]

Episode 5681/8000, real env return = -34.76


 71%|████████████████████████▉          | 5691/8000 [4:27:11<1:47:57,  2.81s/it]

Episode 5691/8000, real env return = -40.16


 71%|████████████████████████▉          | 5701/8000 [4:27:35<1:40:49,  2.63s/it]

Episode 5701/8000, real env return = -35.80


 71%|████████████████████████▉          | 5711/8000 [4:28:02<1:37:52,  2.57s/it]

Episode 5711/8000, real env return = -44.49


 72%|█████████████████████████          | 5721/8000 [4:28:28<1:37:23,  2.56s/it]

Episode 5721/8000, real env return = -42.62


 72%|█████████████████████████          | 5731/8000 [4:28:52<1:32:32,  2.45s/it]

Episode 5731/8000, real env return = -103.71


 72%|█████████████████████████          | 5741/8000 [4:29:16<1:39:12,  2.64s/it]

Episode 5741/8000, real env return = -70.67


 72%|█████████████████████████▏         | 5751/8000 [4:29:45<1:39:41,  2.66s/it]

Episode 5751/8000, real env return = -66.97


 72%|█████████████████████████▏         | 5761/8000 [4:30:08<1:36:14,  2.58s/it]

Episode 5761/8000, real env return = -59.59


 72%|█████████████████████████▏         | 5771/8000 [4:30:35<1:42:34,  2.76s/it]

Episode 5771/8000, real env return = -33.77


 72%|█████████████████████████▎         | 5781/8000 [4:31:02<1:42:20,  2.77s/it]

Episode 5781/8000, real env return = -41.27


 72%|█████████████████████████▎         | 5791/8000 [4:31:29<1:45:20,  2.86s/it]

Episode 5791/8000, real env return = -76.95


 73%|█████████████████████████▍         | 5801/8000 [4:31:55<1:34:30,  2.58s/it]

Episode 5801/8000, real env return = -39.88


 73%|█████████████████████████▍         | 5811/8000 [4:32:22<1:43:52,  2.85s/it]

Episode 5811/8000, real env return = -30.80


 73%|█████████████████████████▍         | 5821/8000 [4:32:49<1:38:53,  2.72s/it]

Episode 5821/8000, real env return = -30.88


 73%|█████████████████████████▌         | 5831/8000 [4:33:16<1:37:43,  2.70s/it]

Episode 5831/8000, real env return = -22.23


 73%|█████████████████████████▌         | 5841/8000 [4:33:41<1:20:32,  2.24s/it]

Episode 5841/8000, real env return = -44.45


 73%|█████████████████████████▌         | 5851/8000 [4:34:05<1:36:15,  2.69s/it]

Episode 5851/8000, real env return = -37.69


 73%|█████████████████████████▋         | 5861/8000 [4:34:29<1:24:51,  2.38s/it]

Episode 5861/8000, real env return = -115.33


 73%|█████████████████████████▋         | 5871/8000 [4:34:53<1:21:30,  2.30s/it]

Episode 5871/8000, real env return = -125.96


 74%|█████████████████████████▋         | 5881/8000 [4:35:19<1:38:08,  2.78s/it]

Episode 5881/8000, real env return = -19.20


 74%|█████████████████████████▊         | 5891/8000 [4:35:44<1:28:51,  2.53s/it]

Episode 5891/8000, real env return = -40.49


 74%|█████████████████████████▊         | 5901/8000 [4:36:09<1:30:16,  2.58s/it]

Episode 5901/8000, real env return = -127.11


 74%|█████████████████████████▊         | 5911/8000 [4:36:35<1:23:20,  2.39s/it]

Episode 5911/8000, real env return = -40.83


 74%|█████████████████████████▉         | 5921/8000 [4:37:02<1:37:21,  2.81s/it]

Episode 5921/8000, real env return = -33.87


 74%|█████████████████████████▉         | 5931/8000 [4:37:27<1:20:08,  2.32s/it]

Episode 5931/8000, real env return = -107.96


 74%|█████████████████████████▉         | 5941/8000 [4:37:51<1:15:07,  2.19s/it]

Episode 5941/8000, real env return = -34.59


 74%|██████████████████████████         | 5951/8000 [4:38:19<1:27:27,  2.56s/it]

Episode 5951/8000, real env return = -94.60


 75%|██████████████████████████         | 5961/8000 [4:38:46<1:30:05,  2.65s/it]

Episode 5961/8000, real env return = -91.36


 75%|██████████████████████████         | 5971/8000 [4:39:14<1:31:00,  2.69s/it]

Episode 5971/8000, real env return = -40.97


 75%|██████████████████████████▏        | 5981/8000 [4:39:45<1:42:15,  3.04s/it]

Episode 5981/8000, real env return = -23.62


 75%|██████████████████████████▏        | 5991/8000 [4:40:13<1:33:20,  2.79s/it]

Episode 5991/8000, real env return = -84.66


 75%|██████████████████████████▎        | 6001/8000 [4:40:38<1:20:51,  2.43s/it]

Episode 6001/8000, real env return = -48.59


 75%|██████████████████████████▎        | 6011/8000 [4:41:05<1:31:33,  2.76s/it]

Episode 6011/8000, real env return = -38.13


 75%|██████████████████████████▎        | 6021/8000 [4:41:33<1:32:20,  2.80s/it]

Episode 6021/8000, real env return = -25.94


 75%|██████████████████████████▍        | 6031/8000 [4:42:02<1:37:33,  2.97s/it]

Episode 6031/8000, real env return = -25.35


 76%|██████████████████████████▍        | 6041/8000 [4:42:29<1:35:29,  2.92s/it]

Episode 6041/8000, real env return = -28.92


 76%|██████████████████████████▍        | 6051/8000 [4:42:56<1:25:15,  2.62s/it]

Episode 6051/8000, real env return = -35.15


 76%|██████████████████████████▌        | 6061/8000 [4:43:20<1:12:00,  2.23s/it]

Episode 6061/8000, real env return = -18.95


 76%|██████████████████████████▌        | 6071/8000 [4:43:48<1:31:26,  2.84s/it]

Episode 6071/8000, real env return = -38.02


 76%|██████████████████████████▌        | 6081/8000 [4:44:15<1:26:12,  2.70s/it]

Episode 6081/8000, real env return = -25.95


 76%|██████████████████████████▋        | 6091/8000 [4:44:42<1:22:59,  2.61s/it]

Episode 6091/8000, real env return = -42.40


 76%|██████████████████████████▋        | 6101/8000 [4:45:05<1:11:58,  2.27s/it]

Episode 6101/8000, real env return = -26.46


 76%|██████████████████████████▋        | 6111/8000 [4:45:32<1:24:42,  2.69s/it]

Episode 6111/8000, real env return = -25.16


 77%|██████████████████████████▊        | 6121/8000 [4:46:00<1:30:30,  2.89s/it]

Episode 6121/8000, real env return = -39.89


 77%|██████████████████████████▊        | 6131/8000 [4:46:28<1:29:16,  2.87s/it]

Episode 6131/8000, real env return = -41.39


 77%|██████████████████████████▊        | 6141/8000 [4:46:57<1:34:08,  3.04s/it]

Episode 6141/8000, real env return = -27.02


 77%|██████████████████████████▉        | 6151/8000 [4:47:24<1:20:01,  2.60s/it]

Episode 6151/8000, real env return = -31.72


 77%|██████████████████████████▉        | 6161/8000 [4:47:51<1:25:14,  2.78s/it]

Episode 6161/8000, real env return = -25.95


 77%|██████████████████████████▉        | 6171/8000 [4:48:18<1:25:12,  2.80s/it]

Episode 6171/8000, real env return = -37.31


 77%|███████████████████████████        | 6181/8000 [4:48:47<1:25:12,  2.81s/it]

Episode 6181/8000, real env return = -44.59


 77%|███████████████████████████        | 6191/8000 [4:49:14<1:21:42,  2.71s/it]

Episode 6191/8000, real env return = -36.95


 78%|███████████████████████████▏       | 6201/8000 [4:49:39<1:05:58,  2.20s/it]

Episode 6201/8000, real env return = -38.30


 78%|███████████████████████████▏       | 6211/8000 [4:50:04<1:08:26,  2.30s/it]

Episode 6211/8000, real env return = -44.89


 78%|███████████████████████████▏       | 6221/8000 [4:50:31<1:22:17,  2.78s/it]

Episode 6221/8000, real env return = -37.64


 78%|███████████████████████████▎       | 6231/8000 [4:50:57<1:18:11,  2.65s/it]

Episode 6231/8000, real env return = -122.06


 78%|███████████████████████████▎       | 6241/8000 [4:51:26<1:32:50,  3.17s/it]

Episode 6241/8000, real env return = -33.27


 78%|███████████████████████████▎       | 6251/8000 [4:51:51<1:10:15,  2.41s/it]

Episode 6251/8000, real env return = -35.32


 78%|███████████████████████████▍       | 6261/8000 [4:52:19<1:20:15,  2.77s/it]

Episode 6261/8000, real env return = -34.75


 78%|███████████████████████████▍       | 6271/8000 [4:52:46<1:05:55,  2.29s/it]

Episode 6271/8000, real env return = -32.25


 79%|███████████████████████████▍       | 6281/8000 [4:53:13<1:19:03,  2.76s/it]

Episode 6281/8000, real env return = -36.09


 79%|███████████████████████████▌       | 6291/8000 [4:53:43<1:29:08,  3.13s/it]

Episode 6291/8000, real env return = -33.67


 79%|███████████████████████████▌       | 6301/8000 [4:54:13<1:20:51,  2.86s/it]

Episode 6301/8000, real env return = -58.81


 79%|███████████████████████████▌       | 6311/8000 [4:54:42<1:24:22,  3.00s/it]

Episode 6311/8000, real env return = -33.57


 79%|███████████████████████████▋       | 6321/8000 [4:55:09<1:14:42,  2.67s/it]

Episode 6321/8000, real env return = -34.32


 79%|███████████████████████████▋       | 6331/8000 [4:55:37<1:20:33,  2.90s/it]

Episode 6331/8000, real env return = -49.30


 79%|███████████████████████████▋       | 6341/8000 [4:56:07<1:20:51,  2.92s/it]

Episode 6341/8000, real env return = -36.27


 79%|███████████████████████████▊       | 6351/8000 [4:56:33<1:11:15,  2.59s/it]

Episode 6351/8000, real env return = -32.42


 80%|███████████████████████████▊       | 6361/8000 [4:56:58<1:13:34,  2.69s/it]

Episode 6361/8000, real env return = -35.84


 80%|███████████████████████████▊       | 6371/8000 [4:57:26<1:14:25,  2.74s/it]

Episode 6371/8000, real env return = -32.20


 80%|███████████████████████████▉       | 6381/8000 [4:57:55<1:12:43,  2.70s/it]

Episode 6381/8000, real env return = -34.80


 80%|███████████████████████████▉       | 6391/8000 [4:58:23<1:19:53,  2.98s/it]

Episode 6391/8000, real env return = -42.92


 80%|████████████████████████████       | 6401/8000 [4:58:51<1:13:54,  2.77s/it]

Episode 6401/8000, real env return = -70.40


 80%|████████████████████████████       | 6411/8000 [4:59:20<1:19:15,  2.99s/it]

Episode 6411/8000, real env return = -38.52


 80%|████████████████████████████       | 6421/8000 [4:59:52<1:23:16,  3.16s/it]

Episode 6421/8000, real env return = -30.63


 80%|████████████████████████████▏      | 6431/8000 [5:00:22<1:22:40,  3.16s/it]

Episode 6431/8000, real env return = -34.87


 81%|████████████████████████████▏      | 6441/8000 [5:00:48<1:10:06,  2.70s/it]

Episode 6441/8000, real env return = -25.43


 81%|████████████████████████████▏      | 6451/8000 [5:01:16<1:19:34,  3.08s/it]

Episode 6451/8000, real env return = -31.23


 81%|████████████████████████████▎      | 6461/8000 [5:01:45<1:16:22,  2.98s/it]

Episode 6461/8000, real env return = -31.99


 81%|████████████████████████████▎      | 6471/8000 [5:02:13<1:10:49,  2.78s/it]

Episode 6471/8000, real env return = -41.23


 81%|████████████████████████████▎      | 6481/8000 [5:02:40<1:10:45,  2.80s/it]

Episode 6481/8000, real env return = -28.64


 81%|████████████████████████████▍      | 6491/8000 [5:03:11<1:18:32,  3.12s/it]

Episode 6491/8000, real env return = -40.31


 81%|████████████████████████████▍      | 6501/8000 [5:03:41<1:17:51,  3.12s/it]

Episode 6501/8000, real env return = -34.60


 81%|████████████████████████████▍      | 6511/8000 [5:04:09<1:09:51,  2.82s/it]

Episode 6511/8000, real env return = -42.31


 82%|████████████████████████████▌      | 6521/8000 [5:04:36<1:01:32,  2.50s/it]

Episode 6521/8000, real env return = -35.86


 82%|████████████████████████████▌      | 6531/8000 [5:05:03<1:04:17,  2.63s/it]

Episode 6531/8000, real env return = -36.19


 82%|████████████████████████████▌      | 6541/8000 [5:05:29<1:04:39,  2.66s/it]

Episode 6541/8000, real env return = -69.04


 82%|████████████████████████████▋      | 6551/8000 [5:05:58<1:11:42,  2.97s/it]

Episode 6551/8000, real env return = -47.40


 82%|████████████████████████████▋      | 6561/8000 [5:06:25<1:03:02,  2.63s/it]

Episode 6561/8000, real env return = -30.74


 82%|████████████████████████████▋      | 6571/8000 [5:06:50<1:08:52,  2.89s/it]

Episode 6571/8000, real env return = -47.40


 82%|████████████████████████████▊      | 6581/8000 [5:07:21<1:10:41,  2.99s/it]

Episode 6581/8000, real env return = -53.96


 82%|████████████████████████████▊      | 6591/8000 [5:07:52<1:09:08,  2.94s/it]

Episode 6591/8000, real env return = -52.01


 83%|████████████████████████████▉      | 6601/8000 [5:08:18<1:06:41,  2.86s/it]

Episode 6601/8000, real env return = -47.37


 83%|████████████████████████████▉      | 6611/8000 [5:08:45<1:04:45,  2.80s/it]

Episode 6611/8000, real env return = -49.29


 83%|██████████████████████████████▌      | 6621/8000 [5:09:12<58:23,  2.54s/it]

Episode 6621/8000, real env return = -43.20


 83%|█████████████████████████████      | 6631/8000 [5:09:41<1:03:27,  2.78s/it]

Episode 6631/8000, real env return = -43.87


 83%|█████████████████████████████      | 6641/8000 [5:10:10<1:05:34,  2.90s/it]

Episode 6641/8000, real env return = -39.10


 83%|█████████████████████████████      | 6651/8000 [5:10:35<1:00:34,  2.69s/it]

Episode 6651/8000, real env return = -41.44


 83%|█████████████████████████████▏     | 6661/8000 [5:11:02<1:03:41,  2.85s/it]

Episode 6661/8000, real env return = -30.00


 83%|█████████████████████████████▏     | 6671/8000 [5:11:31<1:04:44,  2.92s/it]

Episode 6671/8000, real env return = -28.44


 84%|█████████████████████████████▏     | 6681/8000 [5:11:58<1:05:07,  2.96s/it]

Episode 6681/8000, real env return = -31.73


 84%|██████████████████████████████▉      | 6691/8000 [5:12:24<50:13,  2.30s/it]

Episode 6691/8000, real env return = -40.50


 84%|██████████████████████████████▉      | 6701/8000 [5:12:50<54:34,  2.52s/it]

Episode 6701/8000, real env return = -46.73


 84%|█████████████████████████████▎     | 6711/8000 [5:13:18<1:02:27,  2.91s/it]

Episode 6711/8000, real env return = -30.63


 84%|█████████████████████████████▍     | 6721/8000 [5:13:44<1:03:10,  2.96s/it]

Episode 6721/8000, real env return = -33.82


 84%|███████████████████████████████▏     | 6731/8000 [5:14:13<55:06,  2.61s/it]

Episode 6731/8000, real env return = -41.59


 84%|███████████████████████████████▏     | 6741/8000 [5:14:42<56:26,  2.69s/it]

Episode 6741/8000, real env return = -31.42


 84%|███████████████████████████████▏     | 6751/8000 [5:15:11<59:20,  2.85s/it]

Episode 6751/8000, real env return = -30.17


 85%|███████████████████████████████▎     | 6761/8000 [5:15:40<56:10,  2.72s/it]

Episode 6761/8000, real env return = -43.64


 85%|███████████████████████████████▎     | 6771/8000 [5:16:07<56:04,  2.74s/it]

Episode 6771/8000, real env return = -45.30


 85%|█████████████████████████████▋     | 6781/8000 [5:16:37<1:00:30,  2.98s/it]

Episode 6781/8000, real env return = -26.28


 85%|███████████████████████████████▍     | 6791/8000 [5:17:05<55:57,  2.78s/it]

Episode 6791/8000, real env return = -37.27


 85%|█████████████████████████████▊     | 6801/8000 [5:17:36<1:01:54,  3.10s/it]

Episode 6801/8000, real env return = -27.90


 85%|███████████████████████████████▌     | 6811/8000 [5:18:05<57:08,  2.88s/it]

Episode 6811/8000, real env return = -30.24


 85%|███████████████████████████████▌     | 6821/8000 [5:18:35<55:39,  2.83s/it]

Episode 6821/8000, real env return = -21.43


 85%|███████████████████████████████▌     | 6831/8000 [5:19:04<56:13,  2.89s/it]

Episode 6831/8000, real env return = -43.75


 86%|███████████████████████████████▋     | 6841/8000 [5:19:32<55:31,  2.87s/it]

Episode 6841/8000, real env return = -52.85


 86%|███████████████████████████████▋     | 6851/8000 [5:20:02<53:37,  2.80s/it]

Episode 6851/8000, real env return = -47.95


 86%|███████████████████████████████▋     | 6861/8000 [5:20:32<57:46,  3.04s/it]

Episode 6861/8000, real env return = -60.09


 86%|███████████████████████████████▊     | 6871/8000 [5:20:59<44:21,  2.36s/it]

Episode 6871/8000, real env return = -40.31


 86%|███████████████████████████████▊     | 6881/8000 [5:21:26<47:38,  2.55s/it]

Episode 6881/8000, real env return = -30.72


 86%|███████████████████████████████▊     | 6891/8000 [5:21:54<54:23,  2.94s/it]

Episode 6891/8000, real env return = -32.78


 86%|███████████████████████████████▉     | 6901/8000 [5:22:19<50:06,  2.74s/it]

Episode 6901/8000, real env return = -42.09


 86%|███████████████████████████████▉     | 6911/8000 [5:22:48<50:35,  2.79s/it]

Episode 6911/8000, real env return = -31.47


 87%|████████████████████████████████     | 6921/8000 [5:23:16<51:49,  2.88s/it]

Episode 6921/8000, real env return = -31.22


 87%|████████████████████████████████     | 6931/8000 [5:23:44<45:57,  2.58s/it]

Episode 6931/8000, real env return = -44.35


 87%|████████████████████████████████     | 6941/8000 [5:24:14<51:56,  2.94s/it]

Episode 6941/8000, real env return = -84.59


 87%|████████████████████████████████▏    | 6951/8000 [5:24:42<47:24,  2.71s/it]

Episode 6951/8000, real env return = -32.84


 87%|████████████████████████████████▏    | 6961/8000 [5:25:09<45:43,  2.64s/it]

Episode 6961/8000, real env return = -98.15


 87%|████████████████████████████████▏    | 6971/8000 [5:25:36<46:39,  2.72s/it]

Episode 6971/8000, real env return = -99.48


 87%|████████████████████████████████▎    | 6981/8000 [5:26:02<38:58,  2.30s/it]

Episode 6981/8000, real env return = -132.55


 87%|████████████████████████████████▎    | 6991/8000 [5:26:30<46:48,  2.78s/it]

Episode 6991/8000, real env return = -45.94


 88%|████████████████████████████████▍    | 7001/8000 [5:26:53<34:02,  2.04s/it]

Episode 7001/8000, real env return = -97.12


 88%|████████████████████████████████▍    | 7011/8000 [5:27:19<47:17,  2.87s/it]

Episode 7011/8000, real env return = -27.25


 88%|████████████████████████████████▍    | 7021/8000 [5:27:47<45:10,  2.77s/it]

Episode 7021/8000, real env return = -33.00


 88%|████████████████████████████████▌    | 7031/8000 [5:28:14<45:10,  2.80s/it]

Episode 7031/8000, real env return = -42.79


 88%|████████████████████████████████▌    | 7041/8000 [5:28:44<48:25,  3.03s/it]

Episode 7041/8000, real env return = -41.17


 88%|████████████████████████████████▌    | 7051/8000 [5:29:13<45:44,  2.89s/it]

Episode 7051/8000, real env return = -57.91


 88%|████████████████████████████████▋    | 7061/8000 [5:29:44<48:18,  3.09s/it]

Episode 7061/8000, real env return = -40.13


 88%|████████████████████████████████▋    | 7071/8000 [5:30:11<42:47,  2.76s/it]

Episode 7071/8000, real env return = -35.29


 89%|████████████████████████████████▋    | 7081/8000 [5:30:37<38:36,  2.52s/it]

Episode 7081/8000, real env return = -26.01


 89%|████████████████████████████████▊    | 7091/8000 [5:31:05<44:33,  2.94s/it]

Episode 7091/8000, real env return = -26.03


 89%|████████████████████████████████▊    | 7101/8000 [5:31:34<41:33,  2.77s/it]

Episode 7101/8000, real env return = -23.40


 89%|████████████████████████████████▉    | 7111/8000 [5:32:01<38:54,  2.63s/it]

Episode 7111/8000, real env return = -28.96


 89%|████████████████████████████████▉    | 7121/8000 [5:32:27<38:02,  2.60s/it]

Episode 7121/8000, real env return = -18.49


 89%|████████████████████████████████▉    | 7131/8000 [5:32:54<39:14,  2.71s/it]

Episode 7131/8000, real env return = -38.51


 89%|█████████████████████████████████    | 7141/8000 [5:33:21<35:31,  2.48s/it]

Episode 7141/8000, real env return = -24.45


 89%|█████████████████████████████████    | 7151/8000 [5:33:51<39:08,  2.77s/it]

Episode 7151/8000, real env return = -29.23


 90%|█████████████████████████████████    | 7161/8000 [5:34:17<36:35,  2.62s/it]

Episode 7161/8000, real env return = -23.79


 90%|█████████████████████████████████▏   | 7171/8000 [5:34:45<41:33,  3.01s/it]

Episode 7171/8000, real env return = -17.59


 90%|█████████████████████████████████▏   | 7181/8000 [5:35:14<39:37,  2.90s/it]

Episode 7181/8000, real env return = -40.99


 90%|█████████████████████████████████▎   | 7191/8000 [5:35:45<43:35,  3.23s/it]

Episode 7191/8000, real env return = -17.98


 90%|█████████████████████████████████▎   | 7201/8000 [5:36:15<41:23,  3.11s/it]

Episode 7201/8000, real env return = -47.02


 90%|█████████████████████████████████▎   | 7211/8000 [5:36:44<40:05,  3.05s/it]

Episode 7211/8000, real env return = -21.55


 90%|█████████████████████████████████▍   | 7221/8000 [5:37:14<40:46,  3.14s/it]

Episode 7221/8000, real env return = -29.45


 90%|█████████████████████████████████▍   | 7231/8000 [5:37:43<36:57,  2.88s/it]

Episode 7231/8000, real env return = -19.54


 91%|█████████████████████████████████▍   | 7241/8000 [5:38:12<34:16,  2.71s/it]

Episode 7241/8000, real env return = -25.90


 91%|█████████████████████████████████▌   | 7251/8000 [5:38:39<35:07,  2.81s/it]

Episode 7251/8000, real env return = -21.58


 91%|█████████████████████████████████▌   | 7261/8000 [5:39:08<35:40,  2.90s/it]

Episode 7261/8000, real env return = -43.13


 91%|█████████████████████████████████▋   | 7271/8000 [5:39:36<32:58,  2.71s/it]

Episode 7271/8000, real env return = -25.53


 91%|█████████████████████████████████▋   | 7281/8000 [5:40:04<35:18,  2.95s/it]

Episode 7281/8000, real env return = -38.06


 91%|█████████████████████████████████▋   | 7291/8000 [5:40:34<34:57,  2.96s/it]

Episode 7291/8000, real env return = -46.40


 91%|█████████████████████████████████▊   | 7301/8000 [5:41:03<34:26,  2.96s/it]

Episode 7301/8000, real env return = -109.25


 91%|█████████████████████████████████▊   | 7311/8000 [5:41:30<30:57,  2.70s/it]

Episode 7311/8000, real env return = -39.93


 92%|█████████████████████████████████▊   | 7321/8000 [5:41:59<31:58,  2.82s/it]

Episode 7321/8000, real env return = -28.98


 92%|█████████████████████████████████▉   | 7331/8000 [5:42:26<30:24,  2.73s/it]

Episode 7331/8000, real env return = -41.62


 92%|█████████████████████████████████▉   | 7341/8000 [5:42:55<32:47,  2.99s/it]

Episode 7341/8000, real env return = -27.47


 92%|█████████████████████████████████▉   | 7351/8000 [5:43:19<24:45,  2.29s/it]

Episode 7351/8000, real env return = -106.09


 92%|██████████████████████████████████   | 7361/8000 [5:43:45<27:06,  2.54s/it]

Episode 7361/8000, real env return = -33.65


 92%|██████████████████████████████████   | 7371/8000 [5:44:09<28:23,  2.71s/it]

Episode 7371/8000, real env return = -47.62


 92%|██████████████████████████████████▏  | 7381/8000 [5:44:32<24:09,  2.34s/it]

Episode 7381/8000, real env return = -121.48


 92%|██████████████████████████████████▏  | 7391/8000 [5:45:00<28:01,  2.76s/it]

Episode 7391/8000, real env return = -36.57


 93%|██████████████████████████████████▏  | 7401/8000 [5:45:27<26:39,  2.67s/it]

Episode 7401/8000, real env return = -37.85


 93%|██████████████████████████████████▎  | 7411/8000 [5:45:54<27:04,  2.76s/it]

Episode 7411/8000, real env return = -151.00


 93%|██████████████████████████████████▎  | 7421/8000 [5:46:23<27:13,  2.82s/it]

Episode 7421/8000, real env return = -50.39


 93%|██████████████████████████████████▎  | 7431/8000 [5:46:52<28:07,  2.97s/it]

Episode 7431/8000, real env return = -57.71


 93%|██████████████████████████████████▍  | 7441/8000 [5:47:22<30:19,  3.25s/it]

Episode 7441/8000, real env return = -111.09


 93%|██████████████████████████████████▍  | 7451/8000 [5:47:48<23:34,  2.58s/it]

Episode 7451/8000, real env return = -86.49


 93%|██████████████████████████████████▌  | 7461/8000 [5:48:15<23:28,  2.61s/it]

Episode 7461/8000, real env return = -52.49


 93%|██████████████████████████████████▌  | 7471/8000 [5:48:45<25:35,  2.90s/it]

Episode 7471/8000, real env return = -40.46


 94%|██████████████████████████████████▌  | 7481/8000 [5:49:15<26:47,  3.10s/it]

Episode 7481/8000, real env return = -26.95


 94%|██████████████████████████████████▋  | 7491/8000 [5:49:44<23:05,  2.72s/it]

Episode 7491/8000, real env return = -36.67


 94%|██████████████████████████████████▋  | 7501/8000 [5:50:11<20:55,  2.52s/it]

Episode 7501/8000, real env return = -29.82


 94%|██████████████████████████████████▋  | 7511/8000 [5:50:40<24:57,  3.06s/it]

Episode 7511/8000, real env return = -27.17


 94%|██████████████████████████████████▊  | 7521/8000 [5:51:08<23:38,  2.96s/it]

Episode 7521/8000, real env return = -24.90


 94%|██████████████████████████████████▊  | 7531/8000 [5:51:37<23:39,  3.03s/it]

Episode 7531/8000, real env return = -28.45


 94%|██████████████████████████████████▉  | 7541/8000 [5:52:04<19:59,  2.61s/it]

Episode 7541/8000, real env return = -25.19


 94%|██████████████████████████████████▉  | 7551/8000 [5:52:31<18:12,  2.43s/it]

Episode 7551/8000, real env return = -44.40


 95%|██████████████████████████████████▉  | 7561/8000 [5:53:00<19:51,  2.71s/it]

Episode 7561/8000, real env return = -25.56


 95%|███████████████████████████████████  | 7571/8000 [5:53:30<22:28,  3.14s/it]

Episode 7571/8000, real env return = -27.70


 95%|███████████████████████████████████  | 7581/8000 [5:54:00<21:38,  3.10s/it]

Episode 7581/8000, real env return = -27.92


 95%|███████████████████████████████████  | 7591/8000 [5:54:26<17:43,  2.60s/it]

Episode 7591/8000, real env return = -28.55


 95%|███████████████████████████████████▏ | 7601/8000 [5:54:56<19:04,  2.87s/it]

Episode 7601/8000, real env return = -25.92


 95%|███████████████████████████████████▏ | 7611/8000 [5:55:24<17:32,  2.71s/it]

Episode 7611/8000, real env return = -38.65


 95%|███████████████████████████████████▏ | 7621/8000 [5:55:52<17:11,  2.72s/it]

Episode 7621/8000, real env return = -24.22


 95%|███████████████████████████████████▎ | 7631/8000 [5:56:23<19:47,  3.22s/it]

Episode 7631/8000, real env return = -22.91


 96%|███████████████████████████████████▎ | 7641/8000 [5:56:50<16:24,  2.74s/it]

Episode 7641/8000, real env return = -21.28


 96%|███████████████████████████████████▍ | 7651/8000 [5:57:19<16:36,  2.85s/it]

Episode 7651/8000, real env return = -28.37


 96%|███████████████████████████████████▍ | 7661/8000 [5:57:48<15:51,  2.81s/it]

Episode 7661/8000, real env return = -42.98


 96%|███████████████████████████████████▍ | 7671/8000 [5:58:14<13:13,  2.41s/it]

Episode 7671/8000, real env return = -23.78


 96%|███████████████████████████████████▌ | 7681/8000 [5:58:42<13:22,  2.52s/it]

Episode 7681/8000, real env return = -24.33


 96%|███████████████████████████████████▌ | 7691/8000 [5:59:11<15:38,  3.04s/it]

Episode 7691/8000, real env return = -27.27


 96%|███████████████████████████████████▌ | 7701/8000 [5:59:39<12:19,  2.47s/it]

Episode 7701/8000, real env return = -26.26


 96%|███████████████████████████████████▋ | 7711/8000 [6:00:05<11:52,  2.47s/it]

Episode 7711/8000, real env return = -22.55


 97%|███████████████████████████████████▋ | 7721/8000 [6:00:35<13:59,  3.01s/it]

Episode 7721/8000, real env return = -34.80


 97%|███████████████████████████████████▊ | 7731/8000 [6:01:04<13:21,  2.98s/it]

Episode 7731/8000, real env return = -27.07


 97%|███████████████████████████████████▊ | 7741/8000 [6:01:31<10:51,  2.51s/it]

Episode 7741/8000, real env return = -41.00


 97%|███████████████████████████████████▊ | 7751/8000 [6:02:00<12:17,  2.96s/it]

Episode 7751/8000, real env return = -23.46


 97%|███████████████████████████████████▉ | 7761/8000 [6:02:28<11:05,  2.79s/it]

Episode 7761/8000, real env return = -24.28


 97%|███████████████████████████████████▉ | 7771/8000 [6:02:57<11:06,  2.91s/it]

Episode 7771/8000, real env return = -24.31


 97%|███████████████████████████████████▉ | 7781/8000 [6:03:25<10:25,  2.85s/it]

Episode 7781/8000, real env return = -24.61


 97%|████████████████████████████████████ | 7791/8000 [6:03:54<10:37,  3.05s/it]

Episode 7791/8000, real env return = -21.36


 98%|████████████████████████████████████ | 7801/8000 [6:04:20<08:21,  2.52s/it]

Episode 7801/8000, real env return = -28.73


 98%|████████████████████████████████████▏| 7811/8000 [6:04:47<08:04,  2.57s/it]

Episode 7811/8000, real env return = -33.29


 98%|████████████████████████████████████▏| 7821/8000 [6:05:17<09:17,  3.12s/it]

Episode 7821/8000, real env return = -32.82


 98%|████████████████████████████████████▏| 7831/8000 [6:05:45<08:34,  3.05s/it]

Episode 7831/8000, real env return = -24.01


 98%|████████████████████████████████████▎| 7841/8000 [6:06:14<08:24,  3.17s/it]

Episode 7841/8000, real env return = -25.81


 98%|████████████████████████████████████▎| 7851/8000 [6:06:42<06:28,  2.61s/it]

Episode 7851/8000, real env return = -32.31


 98%|████████████████████████████████████▎| 7861/8000 [6:07:10<06:25,  2.77s/it]

Episode 7861/8000, real env return = -26.68


 98%|████████████████████████████████████▍| 7871/8000 [6:07:38<05:40,  2.64s/it]

Episode 7871/8000, real env return = -23.06


 99%|████████████████████████████████████▍| 7881/8000 [6:08:04<04:46,  2.40s/it]

Episode 7881/8000, real env return = -21.44


 99%|████████████████████████████████████▍| 7891/8000 [6:08:31<04:34,  2.52s/it]

Episode 7891/8000, real env return = -23.84


 99%|████████████████████████████████████▌| 7901/8000 [6:09:00<04:33,  2.76s/it]

Episode 7901/8000, real env return = -27.01


 99%|████████████████████████████████████▌| 7911/8000 [6:09:29<04:12,  2.84s/it]

Episode 7911/8000, real env return = -28.28


 99%|████████████████████████████████████▋| 7921/8000 [6:09:57<03:39,  2.78s/it]

Episode 7921/8000, real env return = -22.68


 99%|████████████████████████████████████▋| 7931/8000 [6:10:26<03:07,  2.72s/it]

Episode 7931/8000, real env return = -17.59


 99%|████████████████████████████████████▋| 7941/8000 [6:10:56<02:48,  2.85s/it]

Episode 7941/8000, real env return = -17.52


 99%|████████████████████████████████████▊| 7951/8000 [6:11:23<02:14,  2.74s/it]

Episode 7951/8000, real env return = -28.53


100%|████████████████████████████████████▊| 7961/8000 [6:11:53<01:56,  3.00s/it]

Episode 7961/8000, real env return = -27.46


100%|████████████████████████████████████▊| 7971/8000 [6:12:20<01:28,  3.04s/it]

Episode 7971/8000, real env return = -28.93


100%|████████████████████████████████████▉| 7981/8000 [6:12:48<00:54,  2.86s/it]

Episode 7981/8000, real env return = -38.67


100%|████████████████████████████████████▉| 7991/8000 [6:13:17<00:25,  2.88s/it]

Episode 7991/8000, real env return = -17.18


100%|█████████████████████████████████████| 8000/8000 [6:13:39<00:00,  2.80s/it]


Training finished.


In [11]:
train4 = main()

Using cuda device


  0%|                                          | 2/8000 [00:00<08:38, 15.42it/s]

Episode 1/8000, real env return = -103.57


  0%|                                       | 11/8000 [00:08<2:37:37,  1.18s/it]

Episode 11/8000, real env return = -112.95


  0%|                                       | 21/8000 [00:27<4:10:46,  1.89s/it]

Episode 21/8000, real env return = -104.35


  0%|▏                                      | 31/8000 [00:42<3:57:43,  1.79s/it]

Episode 31/8000, real env return = -109.14


  1%|▏                                      | 41/8000 [01:00<3:37:18,  1.64s/it]

Episode 41/8000, real env return = -100.84


  1%|▏                                      | 51/8000 [01:15<3:21:32,  1.52s/it]

Episode 51/8000, real env return = -112.26


  1%|▎                                      | 61/8000 [01:30<3:20:12,  1.51s/it]

Episode 61/8000, real env return = -102.43


  1%|▎                                      | 71/8000 [01:42<2:41:48,  1.22s/it]

Episode 71/8000, real env return = -111.25


  1%|▍                                      | 81/8000 [01:55<2:32:42,  1.16s/it]

Episode 81/8000, real env return = -117.83


  1%|▍                                      | 91/8000 [02:09<3:09:20,  1.44s/it]

Episode 91/8000, real env return = -108.08


  1%|▍                                     | 101/8000 [02:23<2:57:31,  1.35s/it]

Episode 101/8000, real env return = -112.70


  1%|▌                                     | 111/8000 [02:41<4:03:39,  1.85s/it]

Episode 111/8000, real env return = -117.52


  2%|▌                                     | 121/8000 [03:00<3:41:18,  1.69s/it]

Episode 121/8000, real env return = -118.27


  2%|▌                                     | 131/8000 [03:20<4:35:48,  2.10s/it]

Episode 131/8000, real env return = -121.25


  2%|▋                                     | 141/8000 [03:39<4:27:37,  2.04s/it]

Episode 141/8000, real env return = -98.16


  2%|▋                                     | 151/8000 [03:59<3:52:15,  1.78s/it]

Episode 151/8000, real env return = -123.18


  2%|▊                                     | 161/8000 [04:18<4:51:48,  2.23s/it]

Episode 161/8000, real env return = -119.23


  2%|▊                                     | 171/8000 [04:40<4:34:56,  2.11s/it]

Episode 171/8000, real env return = -101.90


  2%|▊                                     | 181/8000 [05:05<5:40:41,  2.61s/it]

Episode 181/8000, real env return = -87.73


  2%|▉                                     | 191/8000 [05:29<5:04:19,  2.34s/it]

Episode 191/8000, real env return = -156.74


  3%|▉                                     | 201/8000 [05:54<5:30:49,  2.55s/it]

Episode 201/8000, real env return = -150.24


  3%|█                                     | 211/8000 [06:16<4:50:53,  2.24s/it]

Episode 211/8000, real env return = -129.03


  3%|█                                     | 221/8000 [06:39<4:51:17,  2.25s/it]

Episode 221/8000, real env return = -136.39


  3%|█                                     | 231/8000 [07:04<5:51:47,  2.72s/it]

Episode 231/8000, real env return = -160.57


  3%|█▏                                    | 241/8000 [07:29<5:27:52,  2.54s/it]

Episode 241/8000, real env return = -131.38


  3%|█▏                                    | 251/8000 [07:52<4:54:13,  2.28s/it]

Episode 251/8000, real env return = -154.22


  3%|█▏                                    | 261/8000 [08:17<4:40:31,  2.17s/it]

Episode 261/8000, real env return = -150.85


  3%|█▎                                    | 271/8000 [08:39<4:44:59,  2.21s/it]

Episode 271/8000, real env return = -153.64


  4%|█▎                                    | 281/8000 [09:00<4:39:14,  2.17s/it]

Episode 281/8000, real env return = -162.75


  4%|█▍                                    | 291/8000 [09:23<5:07:08,  2.39s/it]

Episode 291/8000, real env return = -149.06


  4%|█▍                                    | 301/8000 [09:49<5:06:10,  2.39s/it]

Episode 301/8000, real env return = -142.29


  4%|█▍                                    | 311/8000 [10:13<4:45:17,  2.23s/it]

Episode 311/8000, real env return = -83.91


  4%|█▌                                    | 321/8000 [10:40<5:45:24,  2.70s/it]

Episode 321/8000, real env return = -105.78


  4%|█▌                                    | 331/8000 [11:07<6:08:30,  2.88s/it]

Episode 331/8000, real env return = -89.83


  4%|█▌                                    | 341/8000 [11:32<5:41:29,  2.68s/it]

Episode 341/8000, real env return = -100.63


  4%|█▋                                    | 351/8000 [11:54<4:43:02,  2.22s/it]

Episode 351/8000, real env return = -104.04


  5%|█▋                                    | 361/8000 [12:19<5:30:53,  2.60s/it]

Episode 361/8000, real env return = -101.64


  5%|█▊                                    | 371/8000 [12:49<6:18:47,  2.98s/it]

Episode 371/8000, real env return = -81.39


  5%|█▊                                    | 381/8000 [13:15<5:52:45,  2.78s/it]

Episode 381/8000, real env return = -88.57


  5%|█▊                                    | 391/8000 [13:43<5:36:07,  2.65s/it]

Episode 391/8000, real env return = -89.07


  5%|█▉                                    | 401/8000 [14:12<5:48:40,  2.75s/it]

Episode 401/8000, real env return = -104.80


  5%|█▉                                    | 411/8000 [14:41<6:18:13,  2.99s/it]

Episode 411/8000, real env return = -102.07


  5%|█▉                                    | 421/8000 [15:09<5:53:53,  2.80s/it]

Episode 421/8000, real env return = -99.12


  5%|██                                    | 431/8000 [15:35<5:09:46,  2.46s/it]

Episode 431/8000, real env return = -97.02


  6%|██                                    | 441/8000 [16:03<5:55:12,  2.82s/it]

Episode 441/8000, real env return = -107.36


  6%|██▏                                   | 451/8000 [16:33<6:26:51,  3.07s/it]

Episode 451/8000, real env return = -123.95


  6%|██▏                                   | 461/8000 [17:00<6:10:42,  2.95s/it]

Episode 461/8000, real env return = -116.20


  6%|██▏                                   | 471/8000 [17:25<4:48:55,  2.30s/it]

Episode 471/8000, real env return = -118.28


  6%|██▎                                   | 481/8000 [17:46<4:36:15,  2.20s/it]

Episode 481/8000, real env return = -127.62


  6%|██▎                                   | 491/8000 [18:06<4:18:59,  2.07s/it]

Episode 491/8000, real env return = -121.77


  6%|██▍                                   | 501/8000 [18:31<5:36:35,  2.69s/it]

Episode 501/8000, real env return = -116.32


  6%|██▍                                   | 511/8000 [19:00<5:46:52,  2.78s/it]

Episode 511/8000, real env return = -101.85


  7%|██▍                                   | 521/8000 [19:31<6:56:52,  3.34s/it]

Episode 521/8000, real env return = -76.37


  7%|██▌                                   | 531/8000 [19:57<5:32:25,  2.67s/it]

Episode 531/8000, real env return = -91.46


  7%|██▌                                   | 541/8000 [20:25<5:41:05,  2.74s/it]

Episode 541/8000, real env return = -109.18


  7%|██▌                                   | 551/8000 [20:52<5:54:06,  2.85s/it]

Episode 551/8000, real env return = -97.62


  7%|██▋                                   | 561/8000 [21:20<5:34:00,  2.69s/it]

Episode 561/8000, real env return = -105.14


  7%|██▋                                   | 571/8000 [21:46<5:16:33,  2.56s/it]

Episode 571/8000, real env return = -101.47


  7%|██▊                                   | 581/8000 [22:14<5:12:52,  2.53s/it]

Episode 581/8000, real env return = -82.14


  7%|██▊                                   | 591/8000 [22:42<5:18:34,  2.58s/it]

Episode 591/8000, real env return = -82.49


  8%|██▊                                   | 601/8000 [23:12<6:16:43,  3.05s/it]

Episode 601/8000, real env return = -85.36


  8%|██▉                                   | 611/8000 [23:42<6:17:45,  3.07s/it]

Episode 611/8000, real env return = -70.21


  8%|██▉                                   | 621/8000 [24:08<5:17:12,  2.58s/it]

Episode 621/8000, real env return = -60.90


  8%|██▉                                   | 631/8000 [24:37<6:07:50,  3.00s/it]

Episode 631/8000, real env return = -67.57


  8%|███                                   | 641/8000 [25:05<5:10:50,  2.53s/it]

Episode 641/8000, real env return = -63.66


  8%|███                                   | 651/8000 [25:32<5:29:46,  2.69s/it]

Episode 651/8000, real env return = -65.77


  8%|███▏                                  | 661/8000 [26:01<5:44:39,  2.82s/it]

Episode 661/8000, real env return = -63.53


  8%|███▏                                  | 671/8000 [26:31<6:22:36,  3.13s/it]

Episode 671/8000, real env return = -59.37


  9%|███▏                                  | 681/8000 [27:01<6:12:27,  3.05s/it]

Episode 681/8000, real env return = -59.99


  9%|███▎                                  | 691/8000 [27:31<6:30:59,  3.21s/it]

Episode 691/8000, real env return = -79.29


  9%|███▎                                  | 701/8000 [28:01<5:57:56,  2.94s/it]

Episode 701/8000, real env return = -57.64


  9%|███▍                                  | 711/8000 [28:28<5:18:55,  2.63s/it]

Episode 711/8000, real env return = -51.79


  9%|███▍                                  | 721/8000 [28:56<5:22:25,  2.66s/it]

Episode 721/8000, real env return = -63.45


  9%|███▍                                  | 731/8000 [29:23<5:31:05,  2.73s/it]

Episode 731/8000, real env return = -53.35


  9%|███▌                                  | 741/8000 [29:50<5:39:36,  2.81s/it]

Episode 741/8000, real env return = -56.45


  9%|███▌                                  | 751/8000 [30:20<5:59:57,  2.98s/it]

Episode 751/8000, real env return = -83.16


 10%|███▌                                  | 761/8000 [30:49<5:43:47,  2.85s/it]

Episode 761/8000, real env return = -58.83


 10%|███▋                                  | 771/8000 [31:19<5:53:44,  2.94s/it]

Episode 771/8000, real env return = -56.87


 10%|███▋                                  | 781/8000 [31:49<5:55:04,  2.95s/it]

Episode 781/8000, real env return = -82.58


 10%|███▊                                  | 791/8000 [32:19<6:07:22,  3.06s/it]

Episode 791/8000, real env return = -51.71


 10%|███▊                                  | 801/8000 [32:49<6:12:21,  3.10s/it]

Episode 801/8000, real env return = -58.77


 10%|███▊                                  | 811/8000 [33:22<6:40:45,  3.34s/it]

Episode 811/8000, real env return = -59.34


 10%|███▉                                  | 821/8000 [33:53<5:50:52,  2.93s/it]

Episode 821/8000, real env return = -58.03


 10%|███▉                                  | 831/8000 [34:21<5:13:59,  2.63s/it]

Episode 831/8000, real env return = -67.50


 11%|███▉                                  | 841/8000 [34:51<5:44:29,  2.89s/it]

Episode 841/8000, real env return = -75.67


 11%|████                                  | 851/8000 [35:19<5:45:10,  2.90s/it]

Episode 851/8000, real env return = -59.31


 11%|████                                  | 861/8000 [35:48<5:54:15,  2.98s/it]

Episode 861/8000, real env return = -83.75


 11%|████▏                                 | 871/8000 [36:20<6:26:28,  3.25s/it]

Episode 871/8000, real env return = -78.93


 11%|████▏                                 | 881/8000 [36:49<5:49:50,  2.95s/it]

Episode 881/8000, real env return = -74.58


 11%|████▏                                 | 891/8000 [37:19<6:12:12,  3.14s/it]

Episode 891/8000, real env return = -68.09


 11%|████▎                                 | 901/8000 [37:47<5:43:53,  2.91s/it]

Episode 901/8000, real env return = -75.68


 11%|████▎                                 | 911/8000 [38:18<6:21:27,  3.23s/it]

Episode 911/8000, real env return = -79.46


 12%|████▎                                 | 921/8000 [38:48<6:01:00,  3.06s/it]

Episode 921/8000, real env return = -80.96


 12%|████▍                                 | 931/8000 [39:19<5:35:38,  2.85s/it]

Episode 931/8000, real env return = -98.28


 12%|████▍                                 | 941/8000 [39:51<6:05:47,  3.11s/it]

Episode 941/8000, real env return = -59.93


 12%|████▌                                 | 951/8000 [40:22<6:02:18,  3.08s/it]

Episode 951/8000, real env return = -60.81


 12%|████▌                                 | 961/8000 [40:53<6:20:15,  3.24s/it]

Episode 961/8000, real env return = -55.24


 12%|████▌                                 | 971/8000 [41:24<6:03:55,  3.11s/it]

Episode 971/8000, real env return = -71.38


 12%|████▋                                 | 981/8000 [41:55<6:26:39,  3.31s/it]

Episode 981/8000, real env return = -70.65


 12%|████▋                                 | 991/8000 [42:24<5:34:24,  2.86s/it]

Episode 991/8000, real env return = -60.95


 13%|████▋                                | 1001/8000 [42:55<5:55:26,  3.05s/it]

Episode 1001/8000, real env return = -57.06


 13%|████▋                                | 1011/8000 [43:25<5:21:10,  2.76s/it]

Episode 1011/8000, real env return = -52.18


 13%|████▋                                | 1021/8000 [43:55<5:43:28,  2.95s/it]

Episode 1021/8000, real env return = -58.33


 13%|████▊                                | 1031/8000 [44:26<6:14:03,  3.22s/it]

Episode 1031/8000, real env return = -65.50


 13%|████▊                                | 1041/8000 [44:54<5:33:05,  2.87s/it]

Episode 1041/8000, real env return = -55.91


 13%|████▊                                | 1051/8000 [45:24<5:50:15,  3.02s/it]

Episode 1051/8000, real env return = -56.61


 13%|████▉                                | 1061/8000 [45:57<6:25:59,  3.34s/it]

Episode 1061/8000, real env return = -53.92


 13%|████▉                                | 1071/8000 [46:26<5:42:32,  2.97s/it]

Episode 1071/8000, real env return = -58.74


 14%|████▉                                | 1081/8000 [46:59<6:26:35,  3.35s/it]

Episode 1081/8000, real env return = -45.50


 14%|█████                                | 1091/8000 [47:30<5:56:13,  3.09s/it]

Episode 1091/8000, real env return = -56.54


 14%|█████                                | 1101/8000 [48:01<5:14:09,  2.73s/it]

Episode 1101/8000, real env return = -56.57


 14%|█████▏                               | 1111/8000 [48:31<5:43:07,  2.99s/it]

Episode 1111/8000, real env return = -53.44


 14%|█████▏                               | 1121/8000 [49:00<5:35:42,  2.93s/it]

Episode 1121/8000, real env return = -69.25


 14%|█████▏                               | 1131/8000 [49:31<6:01:53,  3.16s/it]

Episode 1131/8000, real env return = -58.30


 14%|█████▎                               | 1141/8000 [50:05<6:31:23,  3.42s/it]

Episode 1141/8000, real env return = -66.75


 14%|█████▎                               | 1151/8000 [50:36<5:57:04,  3.13s/it]

Episode 1151/8000, real env return = -76.45


 15%|█████▎                               | 1161/8000 [51:05<5:33:59,  2.93s/it]

Episode 1161/8000, real env return = -109.29


 15%|█████▍                               | 1171/8000 [51:35<5:46:32,  3.04s/it]

Episode 1171/8000, real env return = -49.62


 15%|█████▍                               | 1181/8000 [52:06<6:07:52,  3.24s/it]

Episode 1181/8000, real env return = -81.13


 15%|█████▌                               | 1191/8000 [52:37<5:48:17,  3.07s/it]

Episode 1191/8000, real env return = -69.86


 15%|█████▌                               | 1201/8000 [53:09<5:56:28,  3.15s/it]

Episode 1201/8000, real env return = -74.44


 15%|█████▌                               | 1211/8000 [53:41<6:20:07,  3.36s/it]

Episode 1211/8000, real env return = -63.08


 15%|█████▋                               | 1221/8000 [54:15<6:04:03,  3.22s/it]

Episode 1221/8000, real env return = -60.61


 15%|█████▋                               | 1231/8000 [54:46<5:54:31,  3.14s/it]

Episode 1231/8000, real env return = -64.22


 16%|█████▋                               | 1241/8000 [55:18<6:14:59,  3.33s/it]

Episode 1241/8000, real env return = -74.97


 16%|█████▊                               | 1251/8000 [55:50<5:49:27,  3.11s/it]

Episode 1251/8000, real env return = -72.15


 16%|█████▊                               | 1261/8000 [56:22<5:57:23,  3.18s/it]

Episode 1261/8000, real env return = -68.54


 16%|█████▉                               | 1271/8000 [56:52<5:37:26,  3.01s/it]

Episode 1271/8000, real env return = -81.43


 16%|█████▉                               | 1281/8000 [57:23<5:49:36,  3.12s/it]

Episode 1281/8000, real env return = -42.90


 16%|█████▉                               | 1291/8000 [57:51<5:02:25,  2.70s/it]

Episode 1291/8000, real env return = -59.63


 16%|██████                               | 1301/8000 [58:21<5:51:42,  3.15s/it]

Episode 1301/8000, real env return = -73.72


 16%|██████                               | 1311/8000 [58:50<5:33:32,  2.99s/it]

Episode 1311/8000, real env return = -68.72


 17%|██████                               | 1321/8000 [59:20<5:15:28,  2.83s/it]

Episode 1321/8000, real env return = -66.06


 17%|██████▏                              | 1331/8000 [59:51<5:50:54,  3.16s/it]

Episode 1331/8000, real env return = -74.24


 17%|█████▊                             | 1341/8000 [1:00:21<5:50:36,  3.16s/it]

Episode 1341/8000, real env return = -50.81


 17%|█████▉                             | 1351/8000 [1:00:53<5:31:42,  2.99s/it]

Episode 1351/8000, real env return = -30.40


 17%|█████▉                             | 1361/8000 [1:01:24<5:38:35,  3.06s/it]

Episode 1361/8000, real env return = -33.82


 17%|█████▉                             | 1371/8000 [1:01:54<4:56:51,  2.69s/it]

Episode 1371/8000, real env return = -53.06


 17%|██████                             | 1381/8000 [1:02:23<5:18:16,  2.89s/it]

Episode 1381/8000, real env return = -62.23


 17%|██████                             | 1391/8000 [1:02:54<5:43:17,  3.12s/it]

Episode 1391/8000, real env return = -48.63


 18%|██████▏                            | 1401/8000 [1:03:26<5:55:58,  3.24s/it]

Episode 1401/8000, real env return = -37.59


 18%|██████▏                            | 1411/8000 [1:03:56<5:25:37,  2.97s/it]

Episode 1411/8000, real env return = -57.50


 18%|██████▏                            | 1421/8000 [1:04:26<5:38:29,  3.09s/it]

Episode 1421/8000, real env return = -50.92


 18%|██████▎                            | 1431/8000 [1:04:58<5:42:03,  3.12s/it]

Episode 1431/8000, real env return = -31.44


 18%|██████▎                            | 1441/8000 [1:05:29<5:39:56,  3.11s/it]

Episode 1441/8000, real env return = -54.35


 18%|██████▎                            | 1451/8000 [1:05:59<5:46:40,  3.18s/it]

Episode 1451/8000, real env return = -38.36


 18%|██████▍                            | 1461/8000 [1:06:30<5:43:48,  3.15s/it]

Episode 1461/8000, real env return = -48.23


 18%|██████▍                            | 1471/8000 [1:07:02<5:54:02,  3.25s/it]

Episode 1471/8000, real env return = -33.83


 19%|██████▍                            | 1481/8000 [1:07:32<5:03:55,  2.80s/it]

Episode 1481/8000, real env return = -43.95


 19%|██████▌                            | 1491/8000 [1:08:01<5:27:01,  3.01s/it]

Episode 1491/8000, real env return = -39.35


 19%|██████▌                            | 1501/8000 [1:08:31<5:01:19,  2.78s/it]

Episode 1501/8000, real env return = -34.79


 19%|██████▌                            | 1511/8000 [1:08:59<5:10:18,  2.87s/it]

Episode 1511/8000, real env return = -36.49


 19%|██████▋                            | 1521/8000 [1:09:29<5:38:00,  3.13s/it]

Episode 1521/8000, real env return = -32.69


 19%|██████▋                            | 1531/8000 [1:09:59<5:32:36,  3.09s/it]

Episode 1531/8000, real env return = -30.93


 19%|██████▋                            | 1541/8000 [1:10:29<5:17:57,  2.95s/it]

Episode 1541/8000, real env return = -42.60


 19%|██████▊                            | 1551/8000 [1:10:58<5:26:47,  3.04s/it]

Episode 1551/8000, real env return = -36.33


 20%|██████▊                            | 1561/8000 [1:11:28<5:30:57,  3.08s/it]

Episode 1561/8000, real env return = -33.82


 20%|██████▊                            | 1571/8000 [1:11:59<5:36:31,  3.14s/it]

Episode 1571/8000, real env return = -36.17


 20%|██████▉                            | 1581/8000 [1:12:30<5:37:35,  3.16s/it]

Episode 1581/8000, real env return = -42.37


 20%|██████▉                            | 1591/8000 [1:13:01<5:36:48,  3.15s/it]

Episode 1591/8000, real env return = -35.17


 20%|███████                            | 1601/8000 [1:13:32<5:11:52,  2.92s/it]

Episode 1601/8000, real env return = -43.31


 20%|███████                            | 1611/8000 [1:14:04<5:46:39,  3.26s/it]

Episode 1611/8000, real env return = -31.74


 20%|███████                            | 1621/8000 [1:14:35<5:31:09,  3.11s/it]

Episode 1621/8000, real env return = -26.91


 20%|███████▏                           | 1631/8000 [1:15:08<5:41:27,  3.22s/it]

Episode 1631/8000, real env return = -50.56


 21%|███████▏                           | 1641/8000 [1:15:38<5:12:07,  2.95s/it]

Episode 1641/8000, real env return = -56.53


 21%|███████▏                           | 1651/8000 [1:16:08<5:26:54,  3.09s/it]

Episode 1651/8000, real env return = -43.00


 21%|███████▎                           | 1661/8000 [1:16:40<5:40:53,  3.23s/it]

Episode 1661/8000, real env return = -29.21


 21%|███████▎                           | 1671/8000 [1:17:11<5:28:29,  3.11s/it]

Episode 1671/8000, real env return = -32.31


 21%|███████▎                           | 1681/8000 [1:17:43<5:32:34,  3.16s/it]

Episode 1681/8000, real env return = -32.48


 21%|███████▍                           | 1691/8000 [1:18:14<5:23:22,  3.08s/it]

Episode 1691/8000, real env return = -36.67


 21%|███████▍                           | 1701/8000 [1:18:45<4:58:28,  2.84s/it]

Episode 1701/8000, real env return = -107.38


 21%|███████▍                           | 1711/8000 [1:19:17<5:04:13,  2.90s/it]

Episode 1711/8000, real env return = -31.25


 22%|███████▌                           | 1721/8000 [1:19:48<5:21:04,  3.07s/it]

Episode 1721/8000, real env return = -38.50


 22%|███████▌                           | 1731/8000 [1:20:18<5:08:28,  2.95s/it]

Episode 1731/8000, real env return = -39.62


 22%|███████▌                           | 1741/8000 [1:20:49<5:23:51,  3.10s/it]

Episode 1741/8000, real env return = -40.72


 22%|███████▋                           | 1751/8000 [1:21:20<5:26:11,  3.13s/it]

Episode 1751/8000, real env return = -37.28


 22%|███████▋                           | 1761/8000 [1:21:52<5:40:39,  3.28s/it]

Episode 1761/8000, real env return = -39.01


 22%|███████▋                           | 1771/8000 [1:22:24<5:40:11,  3.28s/it]

Episode 1771/8000, real env return = -39.73


 22%|███████▊                           | 1781/8000 [1:22:57<5:33:08,  3.21s/it]

Episode 1781/8000, real env return = -29.63


 22%|███████▊                           | 1791/8000 [1:23:28<5:36:58,  3.26s/it]

Episode 1791/8000, real env return = -39.29


 23%|███████▉                           | 1801/8000 [1:23:59<5:16:36,  3.06s/it]

Episode 1801/8000, real env return = -29.97


 23%|███████▉                           | 1811/8000 [1:24:32<5:34:03,  3.24s/it]

Episode 1811/8000, real env return = -32.82


 23%|███████▉                           | 1821/8000 [1:25:03<5:19:21,  3.10s/it]

Episode 1821/8000, real env return = -40.19


 23%|████████                           | 1831/8000 [1:25:36<5:42:34,  3.33s/it]

Episode 1831/8000, real env return = -36.25


 23%|████████                           | 1841/8000 [1:26:07<5:20:31,  3.12s/it]

Episode 1841/8000, real env return = -38.06


 23%|████████                           | 1851/8000 [1:26:40<5:21:52,  3.14s/it]

Episode 1851/8000, real env return = -35.82


 23%|████████▏                          | 1861/8000 [1:27:13<5:21:42,  3.14s/it]

Episode 1861/8000, real env return = -35.43


 23%|████████▏                          | 1871/8000 [1:27:46<5:31:51,  3.25s/it]

Episode 1871/8000, real env return = -34.85


 24%|████████▏                          | 1881/8000 [1:28:19<5:36:05,  3.30s/it]

Episode 1881/8000, real env return = -30.86


 24%|████████▎                          | 1891/8000 [1:28:51<5:20:24,  3.15s/it]

Episode 1891/8000, real env return = -32.22


 24%|████████▎                          | 1901/8000 [1:29:24<5:49:29,  3.44s/it]

Episode 1901/8000, real env return = -29.89


 24%|████████▎                          | 1911/8000 [1:29:59<6:06:05,  3.61s/it]

Episode 1911/8000, real env return = -26.62


 24%|████████▍                          | 1921/8000 [1:30:33<5:36:18,  3.32s/it]

Episode 1921/8000, real env return = -38.00


 24%|████████▍                          | 1931/8000 [1:31:05<5:14:18,  3.11s/it]

Episode 1931/8000, real env return = -27.96


 24%|████████▍                          | 1941/8000 [1:31:37<5:20:46,  3.18s/it]

Episode 1941/8000, real env return = -32.97


 24%|████████▌                          | 1951/8000 [1:32:08<5:18:19,  3.16s/it]

Episode 1951/8000, real env return = -43.37


 25%|████████▌                          | 1961/8000 [1:32:39<5:06:36,  3.05s/it]

Episode 1961/8000, real env return = -32.30


 25%|████████▌                          | 1971/8000 [1:33:12<5:28:00,  3.26s/it]

Episode 1971/8000, real env return = -27.19


 25%|████████▋                          | 1981/8000 [1:33:41<4:56:53,  2.96s/it]

Episode 1981/8000, real env return = -34.20


 25%|████████▋                          | 1991/8000 [1:34:12<4:48:41,  2.88s/it]

Episode 1991/8000, real env return = -36.83


 25%|████████▊                          | 2001/8000 [1:34:43<5:11:54,  3.12s/it]

Episode 2001/8000, real env return = -29.61


 25%|████████▊                          | 2011/8000 [1:35:14<5:18:32,  3.19s/it]

Episode 2011/8000, real env return = -28.70


 25%|████████▊                          | 2021/8000 [1:35:46<5:31:01,  3.32s/it]

Episode 2021/8000, real env return = -37.77


 25%|████████▉                          | 2031/8000 [1:36:16<5:16:27,  3.18s/it]

Episode 2031/8000, real env return = -25.32


 26%|████████▉                          | 2041/8000 [1:36:48<5:12:55,  3.15s/it]

Episode 2041/8000, real env return = -26.53


 26%|████████▉                          | 2051/8000 [1:37:19<4:53:55,  2.96s/it]

Episode 2051/8000, real env return = -28.59


 26%|█████████                          | 2061/8000 [1:37:50<5:10:20,  3.14s/it]

Episode 2061/8000, real env return = -26.68


 26%|█████████                          | 2071/8000 [1:38:21<5:10:20,  3.14s/it]

Episode 2071/8000, real env return = -39.58


 26%|█████████                          | 2081/8000 [1:38:51<4:59:24,  3.04s/it]

Episode 2081/8000, real env return = -31.02


 26%|█████████▏                         | 2091/8000 [1:39:22<4:52:12,  2.97s/it]

Episode 2091/8000, real env return = -29.84


 26%|█████████▏                         | 2101/8000 [1:39:54<5:24:44,  3.30s/it]

Episode 2101/8000, real env return = -31.28


 26%|█████████▏                         | 2111/8000 [1:40:26<5:21:17,  3.27s/it]

Episode 2111/8000, real env return = -23.92


 27%|█████████▎                         | 2121/8000 [1:40:59<5:24:25,  3.31s/it]

Episode 2121/8000, real env return = -27.30


 27%|█████████▎                         | 2131/8000 [1:41:32<5:06:06,  3.13s/it]

Episode 2131/8000, real env return = -22.76


 27%|█████████▎                         | 2141/8000 [1:42:05<5:25:08,  3.33s/it]

Episode 2141/8000, real env return = -27.59


 27%|█████████▍                         | 2151/8000 [1:42:37<5:17:47,  3.26s/it]

Episode 2151/8000, real env return = -30.89


 27%|█████████▍                         | 2161/8000 [1:43:08<5:19:46,  3.29s/it]

Episode 2161/8000, real env return = -27.81


 27%|█████████▍                         | 2171/8000 [1:43:40<5:23:26,  3.33s/it]

Episode 2171/8000, real env return = -46.99


 27%|█████████▌                         | 2181/8000 [1:44:12<5:18:49,  3.29s/it]

Episode 2181/8000, real env return = -21.74


 27%|█████████▌                         | 2191/8000 [1:44:44<4:53:28,  3.03s/it]

Episode 2191/8000, real env return = -22.12


 28%|█████████▋                         | 2201/8000 [1:45:17<5:20:53,  3.32s/it]

Episode 2201/8000, real env return = -22.82


 28%|█████████▋                         | 2211/8000 [1:45:50<5:17:34,  3.29s/it]

Episode 2211/8000, real env return = -51.02


 28%|█████████▋                         | 2221/8000 [1:46:21<4:38:38,  2.89s/it]

Episode 2221/8000, real env return = -31.99


 28%|█████████▊                         | 2231/8000 [1:46:54<5:24:42,  3.38s/it]

Episode 2231/8000, real env return = -23.49


 28%|█████████▊                         | 2241/8000 [1:47:27<5:09:04,  3.22s/it]

Episode 2241/8000, real env return = -25.90


 28%|█████████▊                         | 2251/8000 [1:47:59<5:09:14,  3.23s/it]

Episode 2251/8000, real env return = -38.75


 28%|█████████▉                         | 2261/8000 [1:48:30<5:07:12,  3.21s/it]

Episode 2261/8000, real env return = -21.90


 28%|█████████▉                         | 2271/8000 [1:49:03<5:20:19,  3.35s/it]

Episode 2271/8000, real env return = -24.59


 29%|█████████▉                         | 2281/8000 [1:49:36<5:08:35,  3.24s/it]

Episode 2281/8000, real env return = -27.00


 29%|██████████                         | 2291/8000 [1:50:09<5:01:44,  3.17s/it]

Episode 2291/8000, real env return = -25.97


 29%|██████████                         | 2301/8000 [1:50:41<5:01:50,  3.18s/it]

Episode 2301/8000, real env return = -56.17


 29%|██████████                         | 2311/8000 [1:51:14<5:18:47,  3.36s/it]

Episode 2311/8000, real env return = -53.86


 29%|██████████▏                        | 2321/8000 [1:51:47<5:05:08,  3.22s/it]

Episode 2321/8000, real env return = -24.50


 29%|██████████▏                        | 2331/8000 [1:52:19<4:59:59,  3.18s/it]

Episode 2331/8000, real env return = -20.57


 29%|██████████▏                        | 2341/8000 [1:52:51<4:56:22,  3.14s/it]

Episode 2341/8000, real env return = -21.26


 29%|██████████▎                        | 2351/8000 [1:53:23<5:06:29,  3.26s/it]

Episode 2351/8000, real env return = -28.07


 30%|██████████▎                        | 2361/8000 [1:53:54<4:56:24,  3.15s/it]

Episode 2361/8000, real env return = -20.92


 30%|██████████▎                        | 2371/8000 [1:54:27<5:19:16,  3.40s/it]

Episode 2371/8000, real env return = -21.78


 30%|██████████▍                        | 2381/8000 [1:55:00<5:17:19,  3.39s/it]

Episode 2381/8000, real env return = -27.06


 30%|██████████▍                        | 2391/8000 [1:55:33<4:58:59,  3.20s/it]

Episode 2391/8000, real env return = -22.95


 30%|██████████▌                        | 2401/8000 [1:56:06<5:12:51,  3.35s/it]

Episode 2401/8000, real env return = -26.96


 30%|██████████▌                        | 2411/8000 [1:56:40<5:05:00,  3.27s/it]

Episode 2411/8000, real env return = -32.88


 30%|██████████▌                        | 2421/8000 [1:57:13<5:15:45,  3.40s/it]

Episode 2421/8000, real env return = -29.31


 30%|██████████▋                        | 2431/8000 [1:57:44<4:47:44,  3.10s/it]

Episode 2431/8000, real env return = -23.81


 31%|██████████▋                        | 2441/8000 [1:58:15<4:45:10,  3.08s/it]

Episode 2441/8000, real env return = -28.95


 31%|██████████▋                        | 2451/8000 [1:58:47<5:00:35,  3.25s/it]

Episode 2451/8000, real env return = -24.59


 31%|██████████▊                        | 2461/8000 [1:59:19<4:54:07,  3.19s/it]

Episode 2461/8000, real env return = -23.00


 31%|██████████▊                        | 2471/8000 [1:59:51<4:48:55,  3.14s/it]

Episode 2471/8000, real env return = -26.63


 31%|██████████▊                        | 2481/8000 [2:00:23<4:53:13,  3.19s/it]

Episode 2481/8000, real env return = -23.36


 31%|██████████▉                        | 2491/8000 [2:00:56<5:06:06,  3.33s/it]

Episode 2491/8000, real env return = -24.95


 31%|██████████▉                        | 2501/8000 [2:01:28<4:46:35,  3.13s/it]

Episode 2501/8000, real env return = -38.76


 31%|██████████▉                        | 2511/8000 [2:02:00<4:49:05,  3.16s/it]

Episode 2511/8000, real env return = -24.62


 32%|███████████                        | 2521/8000 [2:02:31<4:41:13,  3.08s/it]

Episode 2521/8000, real env return = -24.55


 32%|███████████                        | 2531/8000 [2:03:04<5:00:47,  3.30s/it]

Episode 2531/8000, real env return = -21.44


 32%|███████████                        | 2541/8000 [2:03:35<4:43:43,  3.12s/it]

Episode 2541/8000, real env return = -21.83


 32%|███████████▏                       | 2551/8000 [2:04:07<4:58:22,  3.29s/it]

Episode 2551/8000, real env return = -19.35


 32%|███████████▏                       | 2561/8000 [2:04:39<4:50:43,  3.21s/it]

Episode 2561/8000, real env return = -20.82


 32%|███████████▏                       | 2571/8000 [2:05:12<4:43:50,  3.14s/it]

Episode 2571/8000, real env return = -21.68


 32%|███████████▎                       | 2581/8000 [2:05:46<5:04:44,  3.37s/it]

Episode 2581/8000, real env return = -24.72


 32%|███████████▎                       | 2591/8000 [2:06:17<4:39:28,  3.10s/it]

Episode 2591/8000, real env return = -25.81


 33%|███████████▍                       | 2601/8000 [2:06:48<4:49:19,  3.22s/it]

Episode 2601/8000, real env return = -23.30


 33%|███████████▍                       | 2611/8000 [2:07:21<4:54:26,  3.28s/it]

Episode 2611/8000, real env return = -22.52


 33%|███████████▍                       | 2621/8000 [2:07:52<4:54:55,  3.29s/it]

Episode 2621/8000, real env return = -25.15


 33%|███████████▌                       | 2631/8000 [2:08:24<4:46:22,  3.20s/it]

Episode 2631/8000, real env return = -18.83


 33%|███████████▌                       | 2641/8000 [2:08:56<4:35:52,  3.09s/it]

Episode 2641/8000, real env return = -21.48


 33%|███████████▌                       | 2651/8000 [2:09:26<4:40:03,  3.14s/it]

Episode 2651/8000, real env return = -24.49


 33%|███████████▋                       | 2661/8000 [2:09:58<4:51:28,  3.28s/it]

Episode 2661/8000, real env return = -20.40


 33%|███████████▋                       | 2671/8000 [2:10:29<4:33:26,  3.08s/it]

Episode 2671/8000, real env return = -22.16


 34%|███████████▋                       | 2681/8000 [2:11:00<4:25:58,  3.00s/it]

Episode 2681/8000, real env return = -21.45


 34%|███████████▊                       | 2691/8000 [2:11:32<4:46:32,  3.24s/it]

Episode 2691/8000, real env return = -21.23


 34%|███████████▊                       | 2701/8000 [2:12:05<4:50:52,  3.29s/it]

Episode 2701/8000, real env return = -28.11


 34%|███████████▊                       | 2711/8000 [2:12:38<4:54:31,  3.34s/it]

Episode 2711/8000, real env return = -19.97


 34%|███████████▉                       | 2721/8000 [2:13:11<4:53:51,  3.34s/it]

Episode 2721/8000, real env return = -23.15


 34%|███████████▉                       | 2731/8000 [2:13:43<4:22:28,  2.99s/it]

Episode 2731/8000, real env return = -24.32


 34%|███████████▉                       | 2741/8000 [2:14:16<4:55:02,  3.37s/it]

Episode 2741/8000, real env return = -17.66


 34%|████████████                       | 2751/8000 [2:14:48<4:38:35,  3.18s/it]

Episode 2751/8000, real env return = -23.04


 35%|████████████                       | 2761/8000 [2:15:20<4:34:01,  3.14s/it]

Episode 2761/8000, real env return = -18.89


 35%|████████████                       | 2771/8000 [2:15:53<4:46:32,  3.29s/it]

Episode 2771/8000, real env return = -41.32


 35%|████████████▏                      | 2781/8000 [2:16:27<4:50:01,  3.33s/it]

Episode 2781/8000, real env return = -23.62


 35%|████████████▏                      | 2791/8000 [2:16:58<4:24:51,  3.05s/it]

Episode 2791/8000, real env return = -33.61


 35%|████████████▎                      | 2801/8000 [2:17:29<4:30:32,  3.12s/it]

Episode 2801/8000, real env return = -21.51


 35%|████████████▎                      | 2811/8000 [2:18:02<4:42:50,  3.27s/it]

Episode 2811/8000, real env return = -18.92


 35%|████████████▎                      | 2821/8000 [2:18:34<4:44:41,  3.30s/it]

Episode 2821/8000, real env return = -18.57


 35%|████████████▍                      | 2831/8000 [2:19:06<4:35:13,  3.19s/it]

Episode 2831/8000, real env return = -20.16


 36%|████████████▍                      | 2841/8000 [2:19:38<4:41:22,  3.27s/it]

Episode 2841/8000, real env return = -37.92


 36%|████████████▍                      | 2851/8000 [2:20:12<4:52:04,  3.40s/it]

Episode 2851/8000, real env return = -18.70


 36%|████████████▌                      | 2861/8000 [2:20:45<4:43:33,  3.31s/it]

Episode 2861/8000, real env return = -18.91


 36%|████████████▌                      | 2871/8000 [2:21:17<4:30:03,  3.16s/it]

Episode 2871/8000, real env return = -18.99


 36%|████████████▌                      | 2881/8000 [2:21:48<4:36:36,  3.24s/it]

Episode 2881/8000, real env return = -23.38


 36%|████████████▋                      | 2891/8000 [2:22:21<4:36:33,  3.25s/it]

Episode 2891/8000, real env return = -26.30


 36%|████████████▋                      | 2901/8000 [2:22:52<4:34:15,  3.23s/it]

Episode 2901/8000, real env return = -20.54


 36%|████████████▋                      | 2911/8000 [2:23:24<4:31:44,  3.20s/it]

Episode 2911/8000, real env return = -19.34


 37%|████████████▊                      | 2921/8000 [2:23:56<4:27:12,  3.16s/it]

Episode 2921/8000, real env return = -22.36


 37%|████████████▊                      | 2931/8000 [2:24:28<4:29:42,  3.19s/it]

Episode 2931/8000, real env return = -20.72


 37%|████████████▊                      | 2941/8000 [2:25:01<4:40:38,  3.33s/it]

Episode 2941/8000, real env return = -19.55


 37%|████████████▉                      | 2951/8000 [2:25:34<4:24:08,  3.14s/it]

Episode 2951/8000, real env return = -22.08


 37%|████████████▉                      | 2961/8000 [2:26:06<4:32:10,  3.24s/it]

Episode 2961/8000, real env return = -18.53


 37%|████████████▉                      | 2971/8000 [2:26:38<4:24:51,  3.16s/it]

Episode 2971/8000, real env return = -21.73


 37%|█████████████                      | 2981/8000 [2:27:11<4:29:18,  3.22s/it]

Episode 2981/8000, real env return = -20.80


 37%|█████████████                      | 2991/8000 [2:27:42<4:22:26,  3.14s/it]

Episode 2991/8000, real env return = -27.93


 38%|█████████████▏                     | 3001/8000 [2:28:13<4:14:09,  3.05s/it]

Episode 3001/8000, real env return = -21.31


 38%|█████████████▏                     | 3011/8000 [2:28:46<4:32:36,  3.28s/it]

Episode 3011/8000, real env return = -22.11


 38%|█████████████▏                     | 3021/8000 [2:29:18<4:21:36,  3.15s/it]

Episode 3021/8000, real env return = -22.22


 38%|█████████████▎                     | 3031/8000 [2:29:49<4:26:16,  3.22s/it]

Episode 3031/8000, real env return = -23.53


 38%|█████████████▎                     | 3041/8000 [2:30:21<4:27:40,  3.24s/it]

Episode 3041/8000, real env return = -26.99


 38%|█████████████▎                     | 3051/8000 [2:30:53<4:33:02,  3.31s/it]

Episode 3051/8000, real env return = -27.34


 38%|█████████████▍                     | 3061/8000 [2:31:24<4:27:06,  3.24s/it]

Episode 3061/8000, real env return = -26.81


 38%|█████████████▍                     | 3071/8000 [2:31:56<4:26:00,  3.24s/it]

Episode 3071/8000, real env return = -22.04


 39%|█████████████▍                     | 3081/8000 [2:32:29<4:28:55,  3.28s/it]

Episode 3081/8000, real env return = -23.37


 39%|█████████████▌                     | 3091/8000 [2:33:01<4:22:21,  3.21s/it]

Episode 3091/8000, real env return = -25.73


 39%|█████████████▌                     | 3101/8000 [2:33:35<4:33:45,  3.35s/it]

Episode 3101/8000, real env return = -21.49


 39%|█████████████▌                     | 3111/8000 [2:34:06<4:24:24,  3.24s/it]

Episode 3111/8000, real env return = -20.14


 39%|█████████████▋                     | 3121/8000 [2:34:38<4:22:19,  3.23s/it]

Episode 3121/8000, real env return = -23.29


 39%|█████████████▋                     | 3131/8000 [2:35:11<4:29:33,  3.32s/it]

Episode 3131/8000, real env return = -22.45


 39%|█████████████▋                     | 3141/8000 [2:35:44<4:26:51,  3.30s/it]

Episode 3141/8000, real env return = -23.75


 39%|█████████████▊                     | 3151/8000 [2:36:17<4:31:55,  3.36s/it]

Episode 3151/8000, real env return = -20.01


 40%|█████████████▊                     | 3161/8000 [2:36:50<4:26:55,  3.31s/it]

Episode 3161/8000, real env return = -16.38


 40%|█████████████▊                     | 3171/8000 [2:37:22<4:20:36,  3.24s/it]

Episode 3171/8000, real env return = -22.61


 40%|█████████████▉                     | 3181/8000 [2:37:54<4:24:36,  3.29s/it]

Episode 3181/8000, real env return = -20.90


 40%|█████████████▉                     | 3191/8000 [2:38:28<4:31:06,  3.38s/it]

Episode 3191/8000, real env return = -16.86


 40%|██████████████                     | 3201/8000 [2:39:01<4:16:24,  3.21s/it]

Episode 3201/8000, real env return = -18.48


 40%|██████████████                     | 3211/8000 [2:39:33<4:06:07,  3.08s/it]

Episode 3211/8000, real env return = -20.20


 40%|██████████████                     | 3221/8000 [2:40:07<4:29:11,  3.38s/it]

Episode 3221/8000, real env return = -18.09


 40%|██████████████▏                    | 3231/8000 [2:40:40<4:24:06,  3.32s/it]

Episode 3231/8000, real env return = -20.19


 41%|██████████████▏                    | 3241/8000 [2:41:12<4:27:11,  3.37s/it]

Episode 3241/8000, real env return = -18.66


 41%|██████████████▏                    | 3251/8000 [2:41:45<4:24:26,  3.34s/it]

Episode 3251/8000, real env return = -21.52


 41%|██████████████▎                    | 3261/8000 [2:42:18<4:23:39,  3.34s/it]

Episode 3261/8000, real env return = -22.32


 41%|██████████████▎                    | 3271/8000 [2:42:51<4:15:36,  3.24s/it]

Episode 3271/8000, real env return = -18.88


 41%|██████████████▎                    | 3281/8000 [2:43:24<4:20:15,  3.31s/it]

Episode 3281/8000, real env return = -17.77


 41%|██████████████▍                    | 3291/8000 [2:43:57<4:20:37,  3.32s/it]

Episode 3291/8000, real env return = -21.03


 41%|██████████████▍                    | 3301/8000 [2:44:29<4:15:17,  3.26s/it]

Episode 3301/8000, real env return = -27.34


 41%|██████████████▍                    | 3311/8000 [2:45:00<3:55:56,  3.02s/it]

Episode 3311/8000, real env return = -23.43


 42%|██████████████▌                    | 3321/8000 [2:45:33<4:08:12,  3.18s/it]

Episode 3321/8000, real env return = -34.51


 42%|██████████████▌                    | 3331/8000 [2:46:05<4:13:17,  3.26s/it]

Episode 3331/8000, real env return = -19.35


 42%|██████████████▌                    | 3341/8000 [2:46:38<4:19:15,  3.34s/it]

Episode 3341/8000, real env return = -20.13


 42%|██████████████▋                    | 3351/8000 [2:47:12<4:23:50,  3.41s/it]

Episode 3351/8000, real env return = -22.07


 42%|██████████████▋                    | 3361/8000 [2:47:44<4:07:58,  3.21s/it]

Episode 3361/8000, real env return = -22.46


 42%|██████████████▋                    | 3371/8000 [2:48:14<3:57:12,  3.07s/it]

Episode 3371/8000, real env return = -18.61


 42%|██████████████▊                    | 3381/8000 [2:48:48<4:19:23,  3.37s/it]

Episode 3381/8000, real env return = -27.39


 42%|██████████████▊                    | 3391/8000 [2:49:21<4:15:43,  3.33s/it]

Episode 3391/8000, real env return = -25.87


 43%|██████████████▉                    | 3401/8000 [2:49:52<4:09:03,  3.25s/it]

Episode 3401/8000, real env return = -19.57


 43%|██████████████▉                    | 3411/8000 [2:50:26<4:20:03,  3.40s/it]

Episode 3411/8000, real env return = -20.12


 43%|██████████████▉                    | 3421/8000 [2:51:00<4:17:10,  3.37s/it]

Episode 3421/8000, real env return = -20.73


 43%|███████████████                    | 3431/8000 [2:51:32<3:54:21,  3.08s/it]

Episode 3431/8000, real env return = -21.82


 43%|███████████████                    | 3441/8000 [2:52:05<4:08:09,  3.27s/it]

Episode 3441/8000, real env return = -17.82


 43%|███████████████                    | 3451/8000 [2:52:37<3:59:04,  3.15s/it]

Episode 3451/8000, real env return = -21.20


 43%|███████████████▏                   | 3461/8000 [2:53:10<4:01:48,  3.20s/it]

Episode 3461/8000, real env return = -27.03


 43%|███████████████▏                   | 3471/8000 [2:53:43<4:03:17,  3.22s/it]

Episode 3471/8000, real env return = -20.77


 44%|███████████████▏                   | 3481/8000 [2:54:15<4:06:43,  3.28s/it]

Episode 3481/8000, real env return = -25.44


 44%|███████████████▎                   | 3491/8000 [2:54:49<4:10:22,  3.33s/it]

Episode 3491/8000, real env return = -28.18


 44%|███████████████▎                   | 3501/8000 [2:55:22<4:13:03,  3.37s/it]

Episode 3501/8000, real env return = -23.10


 44%|███████████████▎                   | 3511/8000 [2:55:56<4:10:09,  3.34s/it]

Episode 3511/8000, real env return = -22.61


 44%|███████████████▍                   | 3521/8000 [2:56:28<4:04:05,  3.27s/it]

Episode 3521/8000, real env return = -24.06


 44%|███████████████▍                   | 3531/8000 [2:57:00<4:05:57,  3.30s/it]

Episode 3531/8000, real env return = -24.17


 44%|███████████████▍                   | 3541/8000 [2:57:32<3:48:51,  3.08s/it]

Episode 3541/8000, real env return = -23.25


 44%|███████████████▌                   | 3551/8000 [2:58:04<3:58:52,  3.22s/it]

Episode 3551/8000, real env return = -18.13


 45%|███████████████▌                   | 3561/8000 [2:58:37<3:55:37,  3.18s/it]

Episode 3561/8000, real env return = -90.08


 45%|███████████████▌                   | 3571/8000 [2:59:10<4:03:55,  3.30s/it]

Episode 3571/8000, real env return = -18.19


 45%|███████████████▋                   | 3581/8000 [2:59:44<4:10:57,  3.41s/it]

Episode 3581/8000, real env return = -23.91


 45%|███████████████▋                   | 3591/8000 [3:00:15<3:53:11,  3.17s/it]

Episode 3591/8000, real env return = -18.22


 45%|███████████████▊                   | 3601/8000 [3:00:48<4:02:17,  3.30s/it]

Episode 3601/8000, real env return = -18.72


 45%|███████████████▊                   | 3611/8000 [3:01:21<4:08:43,  3.40s/it]

Episode 3611/8000, real env return = -18.30


 45%|███████████████▊                   | 3621/8000 [3:01:54<4:01:32,  3.31s/it]

Episode 3621/8000, real env return = -15.92


 45%|███████████████▉                   | 3631/8000 [3:02:25<3:57:09,  3.26s/it]

Episode 3631/8000, real env return = -18.20


 46%|███████████████▉                   | 3641/8000 [3:02:59<4:04:24,  3.36s/it]

Episode 3641/8000, real env return = -22.67


 46%|███████████████▉                   | 3651/8000 [3:03:32<4:04:01,  3.37s/it]

Episode 3651/8000, real env return = -19.73


 46%|████████████████                   | 3661/8000 [3:04:04<3:53:19,  3.23s/it]

Episode 3661/8000, real env return = -18.62


 46%|████████████████                   | 3671/8000 [3:04:38<4:04:02,  3.38s/it]

Episode 3671/8000, real env return = -33.26


 46%|████████████████                   | 3681/8000 [3:05:11<3:58:13,  3.31s/it]

Episode 3681/8000, real env return = -25.91


 46%|████████████████▏                  | 3691/8000 [3:05:43<3:52:48,  3.24s/it]

Episode 3691/8000, real env return = -19.37


 46%|████████████████▏                  | 3701/8000 [3:06:17<4:02:33,  3.39s/it]

Episode 3701/8000, real env return = -45.59


 46%|████████████████▏                  | 3711/8000 [3:06:51<3:52:49,  3.26s/it]

Episode 3711/8000, real env return = -20.14


 47%|████████████████▎                  | 3721/8000 [3:07:24<3:54:59,  3.29s/it]

Episode 3721/8000, real env return = -21.58


 47%|████████████████▎                  | 3731/8000 [3:07:57<3:52:47,  3.27s/it]

Episode 3731/8000, real env return = -18.92


 47%|████████████████▎                  | 3741/8000 [3:08:29<3:45:50,  3.18s/it]

Episode 3741/8000, real env return = -14.79


 47%|████████████████▍                  | 3751/8000 [3:09:02<3:54:03,  3.31s/it]

Episode 3751/8000, real env return = -16.61


 47%|████████████████▍                  | 3761/8000 [3:09:35<3:52:59,  3.30s/it]

Episode 3761/8000, real env return = -21.02


 47%|████████████████▍                  | 3771/8000 [3:10:07<3:43:23,  3.17s/it]

Episode 3771/8000, real env return = -18.72


 47%|████████████████▌                  | 3781/8000 [3:10:40<3:55:56,  3.36s/it]

Episode 3781/8000, real env return = -15.61


 47%|████████████████▌                  | 3791/8000 [3:11:14<3:54:22,  3.34s/it]

Episode 3791/8000, real env return = -18.83


 48%|████████████████▋                  | 3801/8000 [3:11:47<3:50:42,  3.30s/it]

Episode 3801/8000, real env return = -20.71


 48%|████████████████▋                  | 3811/8000 [3:12:19<3:44:15,  3.21s/it]

Episode 3811/8000, real env return = -18.39


 48%|████████████████▋                  | 3821/8000 [3:12:51<3:48:06,  3.28s/it]

Episode 3821/8000, real env return = -20.81


 48%|████████████████▊                  | 3831/8000 [3:13:25<3:50:45,  3.32s/it]

Episode 3831/8000, real env return = -19.77


 48%|████████████████▊                  | 3841/8000 [3:13:56<3:45:49,  3.26s/it]

Episode 3841/8000, real env return = -18.51


 48%|████████████████▊                  | 3851/8000 [3:14:29<3:45:02,  3.25s/it]

Episode 3851/8000, real env return = -22.18


 48%|████████████████▉                  | 3861/8000 [3:15:02<3:49:10,  3.32s/it]

Episode 3861/8000, real env return = -21.14


 48%|████████████████▉                  | 3871/8000 [3:15:36<3:50:00,  3.34s/it]

Episode 3871/8000, real env return = -25.11


 49%|████████████████▉                  | 3881/8000 [3:16:07<3:31:47,  3.09s/it]

Episode 3881/8000, real env return = -16.82


 49%|█████████████████                  | 3891/8000 [3:16:39<3:36:57,  3.17s/it]

Episode 3891/8000, real env return = -18.73


 49%|█████████████████                  | 3901/8000 [3:17:11<3:42:42,  3.26s/it]

Episode 3901/8000, real env return = -18.62


 49%|█████████████████                  | 3911/8000 [3:17:44<3:43:45,  3.28s/it]

Episode 3911/8000, real env return = -24.33


 49%|█████████████████▏                 | 3921/8000 [3:18:18<3:46:08,  3.33s/it]

Episode 3921/8000, real env return = -21.51


 49%|█████████████████▏                 | 3931/8000 [3:18:49<3:32:47,  3.14s/it]

Episode 3931/8000, real env return = -28.21


 49%|█████████████████▏                 | 3941/8000 [3:19:23<3:43:33,  3.30s/it]

Episode 3941/8000, real env return = -23.61


 49%|█████████████████▎                 | 3951/8000 [3:19:55<3:38:20,  3.24s/it]

Episode 3951/8000, real env return = -33.64


 50%|█████████████████▎                 | 3961/8000 [3:20:28<3:43:03,  3.31s/it]

Episode 3961/8000, real env return = -30.60


 50%|█████████████████▎                 | 3971/8000 [3:21:01<3:42:04,  3.31s/it]

Episode 3971/8000, real env return = -22.32


 50%|█████████████████▍                 | 3981/8000 [3:21:33<3:35:16,  3.21s/it]

Episode 3981/8000, real env return = -18.61


 50%|█████████████████▍                 | 3991/8000 [3:22:06<3:39:19,  3.28s/it]

Episode 3991/8000, real env return = -22.04


 50%|█████████████████▌                 | 4001/8000 [3:22:38<3:36:13,  3.24s/it]

Episode 4001/8000, real env return = -21.24


 50%|█████████████████▌                 | 4011/8000 [3:23:11<3:38:07,  3.28s/it]

Episode 4011/8000, real env return = -25.24


 50%|█████████████████▌                 | 4021/8000 [3:23:42<3:23:11,  3.06s/it]

Episode 4021/8000, real env return = -27.24


 50%|█████████████████▋                 | 4031/8000 [3:24:12<3:15:37,  2.96s/it]

Episode 4031/8000, real env return = -19.67


 51%|█████████████████▋                 | 4041/8000 [3:24:45<3:34:42,  3.25s/it]

Episode 4041/8000, real env return = -15.95


 51%|█████████████████▋                 | 4051/8000 [3:25:17<3:38:11,  3.32s/it]

Episode 4051/8000, real env return = -18.34


 51%|█████████████████▊                 | 4061/8000 [3:25:49<3:31:27,  3.22s/it]

Episode 4061/8000, real env return = -19.47


 51%|█████████████████▊                 | 4071/8000 [3:26:21<3:29:59,  3.21s/it]

Episode 4071/8000, real env return = -18.14


 51%|█████████████████▊                 | 4081/8000 [3:26:54<3:38:05,  3.34s/it]

Episode 4081/8000, real env return = -30.81


 51%|█████████████████▉                 | 4091/8000 [3:27:27<3:36:35,  3.32s/it]

Episode 4091/8000, real env return = -23.33


 51%|█████████████████▉                 | 4101/8000 [3:27:59<3:29:45,  3.23s/it]

Episode 4101/8000, real env return = -17.81


 51%|█████████████████▉                 | 4111/8000 [3:28:29<2:59:16,  2.77s/it]

Episode 4111/8000, real env return = -19.19


 52%|██████████████████                 | 4121/8000 [3:29:01<3:16:23,  3.04s/it]

Episode 4121/8000, real env return = -28.79


 52%|██████████████████                 | 4131/8000 [3:29:34<3:30:36,  3.27s/it]

Episode 4131/8000, real env return = -17.37


 52%|██████████████████                 | 4141/8000 [3:30:06<3:26:31,  3.21s/it]

Episode 4141/8000, real env return = -26.27


 52%|██████████████████▏                | 4151/8000 [3:30:37<3:21:03,  3.13s/it]

Episode 4151/8000, real env return = -30.86


 52%|██████████████████▏                | 4161/8000 [3:31:09<3:28:43,  3.26s/it]

Episode 4161/8000, real env return = -22.44


 52%|██████████████████▏                | 4171/8000 [3:31:42<3:28:08,  3.26s/it]

Episode 4171/8000, real env return = -17.19


 52%|██████████████████▎                | 4181/8000 [3:32:14<3:23:19,  3.19s/it]

Episode 4181/8000, real env return = -16.43


 52%|██████████████████▎                | 4191/8000 [3:32:46<3:29:41,  3.30s/it]

Episode 4191/8000, real env return = -25.64


 53%|██████████████████▍                | 4201/8000 [3:33:19<3:33:06,  3.37s/it]

Episode 4201/8000, real env return = -31.90


 53%|██████████████████▍                | 4211/8000 [3:33:51<3:25:18,  3.25s/it]

Episode 4211/8000, real env return = -22.17


 53%|██████████████████▍                | 4221/8000 [3:34:24<3:28:46,  3.31s/it]

Episode 4221/8000, real env return = -18.90


 53%|██████████████████▌                | 4231/8000 [3:34:54<3:03:40,  2.92s/it]

Episode 4231/8000, real env return = -121.02


 53%|██████████████████▌                | 4241/8000 [3:35:27<3:24:47,  3.27s/it]

Episode 4241/8000, real env return = -21.85


 53%|██████████████████▌                | 4251/8000 [3:36:01<3:26:48,  3.31s/it]

Episode 4251/8000, real env return = -21.79


 53%|██████████████████▋                | 4261/8000 [3:36:33<3:15:57,  3.14s/it]

Episode 4261/8000, real env return = -23.67


 53%|██████████████████▋                | 4271/8000 [3:37:05<3:11:05,  3.07s/it]

Episode 4271/8000, real env return = -25.04


 54%|██████████████████▋                | 4281/8000 [3:37:38<3:25:38,  3.32s/it]

Episode 4281/8000, real env return = -25.21


 54%|██████████████████▊                | 4291/8000 [3:38:09<3:09:49,  3.07s/it]

Episode 4291/8000, real env return = -35.10


 54%|██████████████████▊                | 4301/8000 [3:38:42<3:22:29,  3.28s/it]

Episode 4301/8000, real env return = -25.69


 54%|██████████████████▊                | 4311/8000 [3:39:15<3:24:30,  3.33s/it]

Episode 4311/8000, real env return = -20.79


 54%|██████████████████▉                | 4321/8000 [3:39:48<3:20:54,  3.28s/it]

Episode 4321/8000, real env return = -17.28


 54%|██████████████████▉                | 4331/8000 [3:40:21<3:19:25,  3.26s/it]

Episode 4331/8000, real env return = -18.64


 54%|██████████████████▉                | 4341/8000 [3:40:52<3:05:33,  3.04s/it]

Episode 4341/8000, real env return = -18.25


 54%|███████████████████                | 4351/8000 [3:41:23<3:13:13,  3.18s/it]

Episode 4351/8000, real env return = -22.01


 55%|███████████████████                | 4361/8000 [3:41:58<3:29:19,  3.45s/it]

Episode 4361/8000, real env return = -26.45


 55%|███████████████████                | 4371/8000 [3:42:31<3:21:44,  3.34s/it]

Episode 4371/8000, real env return = -31.68


 55%|███████████████████▏               | 4381/8000 [3:43:03<3:21:54,  3.35s/it]

Episode 4381/8000, real env return = -23.06


 55%|███████████████████▏               | 4391/8000 [3:43:35<3:14:58,  3.24s/it]

Episode 4391/8000, real env return = -23.67


 55%|███████████████████▎               | 4401/8000 [3:44:07<3:11:30,  3.19s/it]

Episode 4401/8000, real env return = -18.92


 55%|███████████████████▎               | 4411/8000 [3:44:40<3:16:31,  3.29s/it]

Episode 4411/8000, real env return = -32.98


 55%|███████████████████▎               | 4421/8000 [3:45:12<3:16:48,  3.30s/it]

Episode 4421/8000, real env return = -26.80


 55%|███████████████████▍               | 4431/8000 [3:45:45<3:02:56,  3.08s/it]

Episode 4431/8000, real env return = -17.19


 56%|███████████████████▍               | 4441/8000 [3:46:17<3:17:22,  3.33s/it]

Episode 4441/8000, real env return = -18.66


 56%|███████████████████▍               | 4451/8000 [3:46:50<3:14:18,  3.28s/it]

Episode 4451/8000, real env return = -28.45


 56%|███████████████████▌               | 4461/8000 [3:47:23<3:14:41,  3.30s/it]

Episode 4461/8000, real env return = -25.30


 56%|███████████████████▌               | 4471/8000 [3:47:55<3:02:47,  3.11s/it]

Episode 4471/8000, real env return = -23.49


 56%|███████████████████▌               | 4481/8000 [3:48:28<3:07:27,  3.20s/it]

Episode 4481/8000, real env return = -18.90


 56%|███████████████████▋               | 4491/8000 [3:49:01<3:16:19,  3.36s/it]

Episode 4491/8000, real env return = -21.48


 56%|███████████████████▋               | 4501/8000 [3:49:32<3:00:22,  3.09s/it]

Episode 4501/8000, real env return = -24.07


 56%|███████████████████▋               | 4511/8000 [3:50:05<3:13:53,  3.33s/it]

Episode 4511/8000, real env return = -30.33


 57%|███████████████████▊               | 4521/8000 [3:50:38<3:03:34,  3.17s/it]

Episode 4521/8000, real env return = -18.67


 57%|███████████████████▊               | 4531/8000 [3:51:11<3:11:52,  3.32s/it]

Episode 4531/8000, real env return = -16.88


 57%|███████████████████▊               | 4541/8000 [3:51:43<3:09:09,  3.28s/it]

Episode 4541/8000, real env return = -18.40


 57%|███████████████████▉               | 4551/8000 [3:52:16<2:59:46,  3.13s/it]

Episode 4551/8000, real env return = -21.75


 57%|███████████████████▉               | 4561/8000 [3:52:49<3:06:46,  3.26s/it]

Episode 4561/8000, real env return = -17.22


 57%|███████████████████▉               | 4571/8000 [3:53:21<3:11:04,  3.34s/it]

Episode 4571/8000, real env return = -20.58


 57%|████████████████████               | 4581/8000 [3:53:54<3:07:41,  3.29s/it]

Episode 4581/8000, real env return = -18.71


 57%|████████████████████               | 4591/8000 [3:54:27<3:07:57,  3.31s/it]

Episode 4591/8000, real env return = -19.63


 58%|████████████████████▏              | 4601/8000 [3:54:59<3:00:42,  3.19s/it]

Episode 4601/8000, real env return = -18.37


 58%|████████████████████▏              | 4611/8000 [3:55:31<3:04:32,  3.27s/it]

Episode 4611/8000, real env return = -22.81


 58%|████████████████████▏              | 4621/8000 [3:56:04<3:05:32,  3.29s/it]

Episode 4621/8000, real env return = -20.17


 58%|████████████████████▎              | 4631/8000 [3:56:37<3:07:58,  3.35s/it]

Episode 4631/8000, real env return = -20.13


 58%|████████████████████▎              | 4641/8000 [3:57:09<3:02:42,  3.26s/it]

Episode 4641/8000, real env return = -18.81


 58%|████████████████████▎              | 4651/8000 [3:57:42<3:08:07,  3.37s/it]

Episode 4651/8000, real env return = -16.61


 58%|████████████████████▍              | 4661/8000 [3:58:17<3:12:58,  3.47s/it]

Episode 4661/8000, real env return = -20.75


 58%|████████████████████▍              | 4671/8000 [3:58:49<2:57:02,  3.19s/it]

Episode 4671/8000, real env return = -20.44


 59%|████████████████████▍              | 4681/8000 [3:59:21<3:01:37,  3.28s/it]

Episode 4681/8000, real env return = -21.55


 59%|████████████████████▌              | 4691/8000 [3:59:53<2:56:42,  3.20s/it]

Episode 4691/8000, real env return = -18.57


 59%|████████████████████▌              | 4701/8000 [4:00:25<2:54:56,  3.18s/it]

Episode 4701/8000, real env return = -19.13


 59%|████████████████████▌              | 4711/8000 [4:00:58<2:54:44,  3.19s/it]

Episode 4711/8000, real env return = -23.19


 59%|████████████████████▋              | 4721/8000 [4:01:31<2:54:30,  3.19s/it]

Episode 4721/8000, real env return = -18.42


 59%|████████████████████▋              | 4731/8000 [4:02:02<2:50:10,  3.12s/it]

Episode 4731/8000, real env return = -16.84


 59%|████████████████████▋              | 4741/8000 [4:02:34<2:59:03,  3.30s/it]

Episode 4741/8000, real env return = -17.07


 59%|████████████████████▊              | 4751/8000 [4:03:06<2:49:17,  3.13s/it]

Episode 4751/8000, real env return = -27.48


 60%|████████████████████▊              | 4761/8000 [4:03:37<2:51:40,  3.18s/it]

Episode 4761/8000, real env return = -17.40


 60%|████████████████████▊              | 4771/8000 [4:04:08<2:50:11,  3.16s/it]

Episode 4771/8000, real env return = -16.02


 60%|████████████████████▉              | 4781/8000 [4:04:41<2:55:52,  3.28s/it]

Episode 4781/8000, real env return = -13.24


 60%|████████████████████▉              | 4791/8000 [4:05:14<2:55:37,  3.28s/it]

Episode 4791/8000, real env return = -17.95


 60%|█████████████████████              | 4801/8000 [4:05:46<2:55:05,  3.28s/it]

Episode 4801/8000, real env return = -21.40


 60%|█████████████████████              | 4811/8000 [4:06:18<2:54:33,  3.28s/it]

Episode 4811/8000, real env return = -16.08


 60%|█████████████████████              | 4821/8000 [4:06:50<2:54:06,  3.29s/it]

Episode 4821/8000, real env return = -16.72


 60%|█████████████████████▏             | 4831/8000 [4:07:23<2:52:14,  3.26s/it]

Episode 4831/8000, real env return = -21.55


 61%|█████████████████████▏             | 4841/8000 [4:07:56<2:51:56,  3.27s/it]

Episode 4841/8000, real env return = -24.69


 61%|█████████████████████▏             | 4851/8000 [4:08:28<2:52:13,  3.28s/it]

Episode 4851/8000, real env return = -25.90


 61%|█████████████████████▎             | 4861/8000 [4:09:00<2:42:20,  3.10s/it]

Episode 4861/8000, real env return = -26.13


 61%|█████████████████████▎             | 4871/8000 [4:09:33<2:44:49,  3.16s/it]

Episode 4871/8000, real env return = -15.87


 61%|█████████████████████▎             | 4881/8000 [4:10:05<2:42:22,  3.12s/it]

Episode 4881/8000, real env return = -15.56


 61%|█████████████████████▍             | 4891/8000 [4:10:36<2:31:48,  2.93s/it]

Episode 4891/8000, real env return = -35.51


 61%|█████████████████████▍             | 4901/8000 [4:11:09<2:48:46,  3.27s/it]

Episode 4901/8000, real env return = -17.14


 61%|█████████████████████▍             | 4911/8000 [4:11:42<2:47:28,  3.25s/it]

Episode 4911/8000, real env return = -17.18


 62%|█████████████████████▌             | 4921/8000 [4:12:14<2:50:15,  3.32s/it]

Episode 4921/8000, real env return = -24.75


 62%|█████████████████████▌             | 4931/8000 [4:12:46<2:47:25,  3.27s/it]

Episode 4931/8000, real env return = -17.80


 62%|█████████████████████▌             | 4941/8000 [4:13:19<2:44:49,  3.23s/it]

Episode 4941/8000, real env return = -14.89


 62%|█████████████████████▋             | 4951/8000 [4:13:51<2:42:29,  3.20s/it]

Episode 4951/8000, real env return = -26.78


 62%|█████████████████████▋             | 4961/8000 [4:14:24<2:46:12,  3.28s/it]

Episode 4961/8000, real env return = -16.22


 62%|█████████████████████▋             | 4971/8000 [4:14:56<2:39:30,  3.16s/it]

Episode 4971/8000, real env return = -9.97


 62%|█████████████████████▊             | 4981/8000 [4:15:29<2:45:37,  3.29s/it]

Episode 4981/8000, real env return = -11.52


 62%|█████████████████████▊             | 4991/8000 [4:16:01<2:42:39,  3.24s/it]

Episode 4991/8000, real env return = -25.49


 63%|█████████████████████▉             | 5001/8000 [4:16:33<2:42:55,  3.26s/it]

Episode 5001/8000, real env return = -12.29


 63%|█████████████████████▉             | 5011/8000 [4:17:06<2:44:04,  3.29s/it]

Episode 5011/8000, real env return = -19.28


 63%|█████████████████████▉             | 5021/8000 [4:17:38<2:35:03,  3.12s/it]

Episode 5021/8000, real env return = -16.71


 63%|██████████████████████             | 5031/8000 [4:18:11<2:39:54,  3.23s/it]

Episode 5031/8000, real env return = -18.96


 63%|██████████████████████             | 5041/8000 [4:18:43<2:39:15,  3.23s/it]

Episode 5041/8000, real env return = -23.72


 63%|██████████████████████             | 5051/8000 [4:19:15<2:39:37,  3.25s/it]

Episode 5051/8000, real env return = -14.81


 63%|██████████████████████▏            | 5061/8000 [4:19:48<2:39:49,  3.26s/it]

Episode 5061/8000, real env return = -13.96


 63%|██████████████████████▏            | 5071/8000 [4:20:21<2:37:07,  3.22s/it]

Episode 5071/8000, real env return = -13.19


 64%|██████████████████████▏            | 5081/8000 [4:20:53<2:41:08,  3.31s/it]

Episode 5081/8000, real env return = -12.28


 64%|██████████████████████▎            | 5091/8000 [4:21:24<2:27:41,  3.05s/it]

Episode 5091/8000, real env return = -18.91


 64%|██████████████████████▎            | 5101/8000 [4:21:56<2:40:14,  3.32s/it]

Episode 5101/8000, real env return = -24.76


 64%|██████████████████████▎            | 5111/8000 [4:22:29<2:36:47,  3.26s/it]

Episode 5111/8000, real env return = -22.55


 64%|██████████████████████▍            | 5121/8000 [4:23:01<2:36:14,  3.26s/it]

Episode 5121/8000, real env return = -18.51


 64%|██████████████████████▍            | 5131/8000 [4:23:34<2:38:26,  3.31s/it]

Episode 5131/8000, real env return = -27.86


 64%|██████████████████████▍            | 5141/8000 [4:24:06<2:35:16,  3.26s/it]

Episode 5141/8000, real env return = -17.84


 64%|██████████████████████▌            | 5151/8000 [4:24:39<2:33:55,  3.24s/it]

Episode 5151/8000, real env return = -17.65


 65%|██████████████████████▌            | 5161/8000 [4:25:11<2:30:42,  3.19s/it]

Episode 5161/8000, real env return = -17.78


 65%|██████████████████████▌            | 5171/8000 [4:25:43<2:31:06,  3.20s/it]

Episode 5171/8000, real env return = -21.49


 65%|██████████████████████▋            | 5181/8000 [4:26:15<2:32:36,  3.25s/it]

Episode 5181/8000, real env return = -14.55


 65%|██████████████████████▋            | 5191/8000 [4:26:48<2:34:13,  3.29s/it]

Episode 5191/8000, real env return = -21.66


 65%|██████████████████████▊            | 5201/8000 [4:27:20<2:31:37,  3.25s/it]

Episode 5201/8000, real env return = -17.97


 65%|██████████████████████▊            | 5211/8000 [4:27:52<2:30:56,  3.25s/it]

Episode 5211/8000, real env return = -24.37


 65%|██████████████████████▊            | 5221/8000 [4:28:24<2:26:12,  3.16s/it]

Episode 5221/8000, real env return = -15.11


 65%|██████████████████████▉            | 5231/8000 [4:28:56<2:27:15,  3.19s/it]

Episode 5231/8000, real env return = -16.61


 66%|██████████████████████▉            | 5241/8000 [4:29:29<2:24:52,  3.15s/it]

Episode 5241/8000, real env return = -15.87


 66%|██████████████████████▉            | 5251/8000 [4:30:02<2:31:02,  3.30s/it]

Episode 5251/8000, real env return = -25.88


 66%|███████████████████████            | 5261/8000 [4:30:34<2:25:51,  3.19s/it]

Episode 5261/8000, real env return = -17.96


 66%|███████████████████████            | 5271/8000 [4:31:07<2:30:29,  3.31s/it]

Episode 5271/8000, real env return = -15.58


 66%|███████████████████████            | 5281/8000 [4:31:40<2:29:31,  3.30s/it]

Episode 5281/8000, real env return = -16.62


 66%|███████████████████████▏           | 5291/8000 [4:32:12<2:23:55,  3.19s/it]

Episode 5291/8000, real env return = -16.02


 66%|███████████████████████▏           | 5301/8000 [4:32:45<2:28:32,  3.30s/it]

Episode 5301/8000, real env return = -21.45


 66%|███████████████████████▏           | 5311/8000 [4:33:18<2:29:01,  3.33s/it]

Episode 5311/8000, real env return = -13.24


 67%|███████████████████████▎           | 5321/8000 [4:33:51<2:26:20,  3.28s/it]

Episode 5321/8000, real env return = -18.02


 67%|███████████████████████▎           | 5331/8000 [4:34:22<2:16:37,  3.07s/it]

Episode 5331/8000, real env return = -13.92


 67%|███████████████████████▎           | 5341/8000 [4:34:54<2:22:13,  3.21s/it]

Episode 5341/8000, real env return = -13.87


 67%|███████████████████████▍           | 5351/8000 [4:35:27<2:22:40,  3.23s/it]

Episode 5351/8000, real env return = -23.59


 67%|███████████████████████▍           | 5361/8000 [4:35:59<2:18:55,  3.16s/it]

Episode 5361/8000, real env return = -21.10


 67%|███████████████████████▍           | 5371/8000 [4:36:31<2:23:35,  3.28s/it]

Episode 5371/8000, real env return = -19.67


 67%|███████████████████████▌           | 5381/8000 [4:37:04<2:21:59,  3.25s/it]

Episode 5381/8000, real env return = -13.81


 67%|███████████████████████▌           | 5391/8000 [4:37:36<2:17:47,  3.17s/it]

Episode 5391/8000, real env return = -15.06


 68%|███████████████████████▋           | 5401/8000 [4:38:09<2:21:34,  3.27s/it]

Episode 5401/8000, real env return = -21.86


 68%|███████████████████████▋           | 5411/8000 [4:38:42<2:23:18,  3.32s/it]

Episode 5411/8000, real env return = -23.02


 68%|███████████████████████▋           | 5421/8000 [4:39:15<2:21:21,  3.29s/it]

Episode 5421/8000, real env return = -15.77


 68%|███████████████████████▊           | 5431/8000 [4:39:48<2:21:20,  3.30s/it]

Episode 5431/8000, real env return = -15.03


 68%|███████████████████████▊           | 5441/8000 [4:40:20<2:18:29,  3.25s/it]

Episode 5441/8000, real env return = -15.52


 68%|███████████████████████▊           | 5451/8000 [4:40:53<2:20:01,  3.30s/it]

Episode 5451/8000, real env return = -18.15


 68%|███████████████████████▉           | 5461/8000 [4:41:26<2:16:16,  3.22s/it]

Episode 5461/8000, real env return = -15.24


 68%|███████████████████████▉           | 5471/8000 [4:41:58<2:16:10,  3.23s/it]

Episode 5471/8000, real env return = -18.71


 69%|███████████████████████▉           | 5481/8000 [4:42:31<2:17:34,  3.28s/it]

Episode 5481/8000, real env return = -19.12


 69%|████████████████████████           | 5491/8000 [4:43:03<2:16:52,  3.27s/it]

Episode 5491/8000, real env return = -17.68


 69%|████████████████████████           | 5501/8000 [4:43:35<2:15:15,  3.25s/it]

Episode 5501/8000, real env return = -16.78


 69%|████████████████████████           | 5511/8000 [4:44:08<2:16:22,  3.29s/it]

Episode 5511/8000, real env return = -17.12


 69%|████████████████████████▏          | 5521/8000 [4:44:42<2:19:01,  3.36s/it]

Episode 5521/8000, real env return = -18.53


 69%|████████████████████████▏          | 5531/8000 [4:45:16<2:21:02,  3.43s/it]

Episode 5531/8000, real env return = -19.38


 69%|████████████████████████▏          | 5541/8000 [4:45:49<2:17:51,  3.36s/it]

Episode 5541/8000, real env return = -18.47


 69%|████████████████████████▎          | 5551/8000 [4:46:22<2:13:50,  3.28s/it]

Episode 5551/8000, real env return = -19.81


 70%|████████████████████████▎          | 5561/8000 [4:46:56<2:15:48,  3.34s/it]

Episode 5561/8000, real env return = -20.39


 70%|████████████████████████▎          | 5571/8000 [4:47:30<2:19:55,  3.46s/it]

Episode 5571/8000, real env return = -22.22


 70%|████████████████████████▍          | 5581/8000 [4:48:04<2:18:46,  3.44s/it]

Episode 5581/8000, real env return = -13.24


 70%|████████████████████████▍          | 5591/8000 [4:48:37<2:14:30,  3.35s/it]

Episode 5591/8000, real env return = -22.16


 70%|████████████████████████▌          | 5601/8000 [4:49:11<2:16:35,  3.42s/it]

Episode 5601/8000, real env return = -29.75


 70%|████████████████████████▌          | 5611/8000 [4:49:46<2:18:54,  3.49s/it]

Episode 5611/8000, real env return = -16.28


 70%|████████████████████████▌          | 5621/8000 [4:50:19<2:11:50,  3.33s/it]

Episode 5621/8000, real env return = -16.27


 70%|████████████████████████▋          | 5631/8000 [4:50:53<2:14:49,  3.41s/it]

Episode 5631/8000, real env return = -17.37


 71%|████████████████████████▋          | 5641/8000 [4:51:26<2:10:14,  3.31s/it]

Episode 5641/8000, real env return = -14.37


 71%|████████████████████████▋          | 5651/8000 [4:52:01<2:14:54,  3.45s/it]

Episode 5651/8000, real env return = -22.06


 71%|████████████████████████▊          | 5661/8000 [4:52:35<2:11:31,  3.37s/it]

Episode 5661/8000, real env return = -14.95


 71%|████████████████████████▊          | 5671/8000 [4:53:08<2:05:10,  3.22s/it]

Episode 5671/8000, real env return = -13.95


 71%|████████████████████████▊          | 5681/8000 [4:53:40<2:04:57,  3.23s/it]

Episode 5681/8000, real env return = -18.43


 71%|████████████████████████▉          | 5691/8000 [4:54:13<2:06:17,  3.28s/it]

Episode 5691/8000, real env return = -16.98


 71%|████████████████████████▉          | 5701/8000 [4:54:46<2:07:20,  3.32s/it]

Episode 5701/8000, real env return = -18.20


 71%|████████████████████████▉          | 5711/8000 [4:55:19<2:05:35,  3.29s/it]

Episode 5711/8000, real env return = -19.09


 72%|█████████████████████████          | 5721/8000 [4:55:51<1:59:53,  3.16s/it]

Episode 5721/8000, real env return = -16.44


 72%|█████████████████████████          | 5731/8000 [4:56:24<2:03:36,  3.27s/it]

Episode 5731/8000, real env return = -15.13


 72%|█████████████████████████          | 5741/8000 [4:56:55<1:50:58,  2.95s/it]

Episode 5741/8000, real env return = -14.27


 72%|█████████████████████████▏         | 5751/8000 [4:57:28<2:03:30,  3.30s/it]

Episode 5751/8000, real env return = -14.70


 72%|█████████████████████████▏         | 5761/8000 [4:58:01<2:04:23,  3.33s/it]

Episode 5761/8000, real env return = -21.65


 72%|█████████████████████████▏         | 5771/8000 [4:58:33<2:01:25,  3.27s/it]

Episode 5771/8000, real env return = -18.62


 72%|█████████████████████████▎         | 5781/8000 [4:59:06<2:02:12,  3.30s/it]

Episode 5781/8000, real env return = -20.43


 72%|█████████████████████████▎         | 5791/8000 [4:59:39<2:02:07,  3.32s/it]

Episode 5791/8000, real env return = -19.47


 73%|█████████████████████████▍         | 5801/8000 [5:00:12<2:01:54,  3.33s/it]

Episode 5801/8000, real env return = -18.95


 73%|█████████████████████████▍         | 5811/8000 [5:00:45<1:58:51,  3.26s/it]

Episode 5811/8000, real env return = -15.83


 73%|█████████████████████████▍         | 5821/8000 [5:01:17<1:53:59,  3.14s/it]

Episode 5821/8000, real env return = -20.16


 73%|█████████████████████████▌         | 5831/8000 [5:01:49<1:54:15,  3.16s/it]

Episode 5831/8000, real env return = -19.32


 73%|█████████████████████████▌         | 5841/8000 [5:02:21<1:53:36,  3.16s/it]

Episode 5841/8000, real env return = -18.58


 73%|█████████████████████████▌         | 5851/8000 [5:02:53<1:59:17,  3.33s/it]

Episode 5851/8000, real env return = -17.15


 73%|█████████████████████████▋         | 5861/8000 [5:03:26<1:58:21,  3.32s/it]

Episode 5861/8000, real env return = -15.86


 73%|█████████████████████████▋         | 5871/8000 [5:03:58<1:52:53,  3.18s/it]

Episode 5871/8000, real env return = -17.18


 74%|█████████████████████████▋         | 5881/8000 [5:04:32<1:57:05,  3.32s/it]

Episode 5881/8000, real env return = -17.57


 74%|█████████████████████████▊         | 5891/8000 [5:05:04<1:54:54,  3.27s/it]

Episode 5891/8000, real env return = -17.65


 74%|█████████████████████████▊         | 5901/8000 [5:05:36<1:54:25,  3.27s/it]

Episode 5901/8000, real env return = -16.12


 74%|█████████████████████████▊         | 5911/8000 [5:06:09<1:54:31,  3.29s/it]

Episode 5911/8000, real env return = -19.87


 74%|█████████████████████████▉         | 5921/8000 [5:06:42<1:56:07,  3.35s/it]

Episode 5921/8000, real env return = -18.31


 74%|█████████████████████████▉         | 5931/8000 [5:07:14<1:52:13,  3.25s/it]

Episode 5931/8000, real env return = -23.19


 74%|█████████████████████████▉         | 5941/8000 [5:07:47<1:51:19,  3.24s/it]

Episode 5941/8000, real env return = -15.10


 74%|██████████████████████████         | 5951/8000 [5:08:20<1:53:10,  3.31s/it]

Episode 5951/8000, real env return = -14.43


 75%|██████████████████████████         | 5961/8000 [5:08:53<1:51:14,  3.27s/it]

Episode 5961/8000, real env return = -21.71


 75%|██████████████████████████         | 5971/8000 [5:09:25<1:46:03,  3.14s/it]

Episode 5971/8000, real env return = -17.51


 75%|██████████████████████████▏        | 5981/8000 [5:09:57<1:47:26,  3.19s/it]

Episode 5981/8000, real env return = -17.13


 75%|██████████████████████████▏        | 5991/8000 [5:10:30<1:49:11,  3.26s/it]

Episode 5991/8000, real env return = -17.26


 75%|██████████████████████████▎        | 6001/8000 [5:11:03<1:49:24,  3.28s/it]

Episode 6001/8000, real env return = -17.02


 75%|██████████████████████████▎        | 6011/8000 [5:11:35<1:49:04,  3.29s/it]

Episode 6011/8000, real env return = -20.04


 75%|██████████████████████████▎        | 6021/8000 [5:12:07<1:47:44,  3.27s/it]

Episode 6021/8000, real env return = -16.18


 75%|██████████████████████████▍        | 6031/8000 [5:12:40<1:48:40,  3.31s/it]

Episode 6031/8000, real env return = -17.71


 76%|██████████████████████████▍        | 6041/8000 [5:13:12<1:44:21,  3.20s/it]

Episode 6041/8000, real env return = -15.90


 76%|██████████████████████████▍        | 6051/8000 [5:13:45<1:48:14,  3.33s/it]

Episode 6051/8000, real env return = -17.57


 76%|██████████████████████████▌        | 6061/8000 [5:14:18<1:44:08,  3.22s/it]

Episode 6061/8000, real env return = -18.91


 76%|██████████████████████████▌        | 6071/8000 [5:14:51<1:46:05,  3.30s/it]

Episode 6071/8000, real env return = -17.46


 76%|██████████████████████████▌        | 6081/8000 [5:15:23<1:43:50,  3.25s/it]

Episode 6081/8000, real env return = -21.93


 76%|██████████████████████████▋        | 6091/8000 [5:15:56<1:40:19,  3.15s/it]

Episode 6091/8000, real env return = -18.29


 76%|██████████████████████████▋        | 6101/8000 [5:16:28<1:40:42,  3.18s/it]

Episode 6101/8000, real env return = -18.60


 76%|██████████████████████████▋        | 6111/8000 [5:17:00<1:39:32,  3.16s/it]

Episode 6111/8000, real env return = -17.01


 77%|██████████████████████████▊        | 6121/8000 [5:17:33<1:41:20,  3.24s/it]

Episode 6121/8000, real env return = -18.54


 77%|██████████████████████████▊        | 6131/8000 [5:18:06<1:43:46,  3.33s/it]

Episode 6131/8000, real env return = -21.98


 77%|██████████████████████████▊        | 6141/8000 [5:18:38<1:40:23,  3.24s/it]

Episode 6141/8000, real env return = -16.00


 77%|██████████████████████████▉        | 6151/8000 [5:19:10<1:37:00,  3.15s/it]

Episode 6151/8000, real env return = -19.53


 77%|██████████████████████████▉        | 6161/8000 [5:19:42<1:36:57,  3.16s/it]

Episode 6161/8000, real env return = -21.73


 77%|██████████████████████████▉        | 6171/8000 [5:20:15<1:39:44,  3.27s/it]

Episode 6171/8000, real env return = -14.38


 77%|███████████████████████████        | 6181/8000 [5:20:48<1:39:27,  3.28s/it]

Episode 6181/8000, real env return = -17.76


 77%|███████████████████████████        | 6191/8000 [5:21:20<1:37:57,  3.25s/it]

Episode 6191/8000, real env return = -16.57


 78%|███████████████████████████▏       | 6201/8000 [5:21:52<1:38:17,  3.28s/it]

Episode 6201/8000, real env return = -15.07


 78%|███████████████████████████▏       | 6211/8000 [5:22:25<1:36:00,  3.22s/it]

Episode 6211/8000, real env return = -15.86


 78%|███████████████████████████▏       | 6221/8000 [5:22:56<1:25:24,  2.88s/it]

Episode 6221/8000, real env return = -16.85


 78%|███████████████████████████▎       | 6231/8000 [5:23:29<1:37:24,  3.30s/it]

Episode 6231/8000, real env return = -21.56


 78%|███████████████████████████▎       | 6241/8000 [5:24:00<1:30:52,  3.10s/it]

Episode 6241/8000, real env return = -19.91


 78%|███████████████████████████▎       | 6251/8000 [5:24:32<1:32:54,  3.19s/it]

Episode 6251/8000, real env return = -16.16


 78%|███████████████████████████▍       | 6261/8000 [5:25:04<1:35:25,  3.29s/it]

Episode 6261/8000, real env return = -15.56


 78%|███████████████████████████▍       | 6271/8000 [5:25:37<1:34:50,  3.29s/it]

Episode 6271/8000, real env return = -16.59


 79%|███████████████████████████▍       | 6281/8000 [5:26:10<1:33:33,  3.27s/it]

Episode 6281/8000, real env return = -17.83


 79%|███████████████████████████▌       | 6291/8000 [5:26:43<1:34:32,  3.32s/it]

Episode 6291/8000, real env return = -15.77


 79%|███████████████████████████▌       | 6301/8000 [5:27:16<1:34:04,  3.32s/it]

Episode 6301/8000, real env return = -14.39


 79%|███████████████████████████▌       | 6311/8000 [5:27:48<1:29:01,  3.16s/it]

Episode 6311/8000, real env return = -14.58


 79%|███████████████████████████▋       | 6321/8000 [5:28:21<1:31:02,  3.25s/it]

Episode 6321/8000, real env return = -17.99


 79%|███████████████████████████▋       | 6331/8000 [5:28:52<1:30:37,  3.26s/it]

Episode 6331/8000, real env return = -15.26


 79%|███████████████████████████▋       | 6341/8000 [5:29:25<1:30:13,  3.26s/it]

Episode 6341/8000, real env return = -15.85


 79%|███████████████████████████▊       | 6351/8000 [5:29:58<1:30:21,  3.29s/it]

Episode 6351/8000, real env return = -15.98


 80%|███████████████████████████▊       | 6361/8000 [5:30:31<1:30:57,  3.33s/it]

Episode 6361/8000, real env return = -17.86


 80%|███████████████████████████▊       | 6371/8000 [5:31:04<1:29:45,  3.31s/it]

Episode 6371/8000, real env return = -19.37


 80%|███████████████████████████▉       | 6381/8000 [5:31:37<1:29:05,  3.30s/it]

Episode 6381/8000, real env return = -15.81


 80%|███████████████████████████▉       | 6391/8000 [5:32:10<1:29:30,  3.34s/it]

Episode 6391/8000, real env return = -18.06


 80%|████████████████████████████       | 6401/8000 [5:32:43<1:28:18,  3.31s/it]

Episode 6401/8000, real env return = -12.44


 80%|████████████████████████████       | 6411/8000 [5:33:16<1:27:33,  3.31s/it]

Episode 6411/8000, real env return = -16.64


 80%|████████████████████████████       | 6421/8000 [5:33:49<1:25:57,  3.27s/it]

Episode 6421/8000, real env return = -17.54


 80%|████████████████████████████▏      | 6431/8000 [5:34:21<1:21:39,  3.12s/it]

Episode 6431/8000, real env return = -20.49


 81%|████████████████████████████▏      | 6441/8000 [5:34:53<1:25:33,  3.29s/it]

Episode 6441/8000, real env return = -14.62


 81%|████████████████████████████▏      | 6451/8000 [5:35:26<1:23:50,  3.25s/it]

Episode 6451/8000, real env return = -21.30


 81%|████████████████████████████▎      | 6461/8000 [5:35:58<1:23:58,  3.27s/it]

Episode 6461/8000, real env return = -15.87


 81%|████████████████████████████▎      | 6471/8000 [5:36:32<1:23:56,  3.29s/it]

Episode 6471/8000, real env return = -20.90


 81%|████████████████████████████▎      | 6481/8000 [5:37:04<1:23:10,  3.29s/it]

Episode 6481/8000, real env return = -16.12


 81%|████████████████████████████▍      | 6491/8000 [5:37:37<1:24:06,  3.34s/it]

Episode 6491/8000, real env return = -14.96


 81%|████████████████████████████▍      | 6501/8000 [5:38:10<1:18:15,  3.13s/it]

Episode 6501/8000, real env return = -16.64


 81%|████████████████████████████▍      | 6511/8000 [5:38:42<1:21:00,  3.26s/it]

Episode 6511/8000, real env return = -14.55


 82%|████████████████████████████▌      | 6521/8000 [5:39:15<1:21:35,  3.31s/it]

Episode 6521/8000, real env return = -23.70


 82%|████████████████████████████▌      | 6531/8000 [5:39:47<1:19:55,  3.26s/it]

Episode 6531/8000, real env return = -12.37


 82%|████████████████████████████▌      | 6541/8000 [5:40:20<1:19:43,  3.28s/it]

Episode 6541/8000, real env return = -16.61


 82%|████████████████████████████▋      | 6551/8000 [5:40:52<1:17:06,  3.19s/it]

Episode 6551/8000, real env return = -19.28


 82%|████████████████████████████▋      | 6561/8000 [5:41:25<1:18:26,  3.27s/it]

Episode 6561/8000, real env return = -20.60


 82%|████████████████████████████▋      | 6571/8000 [5:41:57<1:14:47,  3.14s/it]

Episode 6571/8000, real env return = -24.29


 82%|████████████████████████████▊      | 6581/8000 [5:42:30<1:18:35,  3.32s/it]

Episode 6581/8000, real env return = -15.17


 82%|████████████████████████████▊      | 6591/8000 [5:43:03<1:16:08,  3.24s/it]

Episode 6591/8000, real env return = -18.90


 83%|████████████████████████████▉      | 6601/8000 [5:43:34<1:15:41,  3.25s/it]

Episode 6601/8000, real env return = -17.14


 83%|████████████████████████████▉      | 6611/8000 [5:44:07<1:16:38,  3.31s/it]

Episode 6611/8000, real env return = -14.98


 83%|████████████████████████████▉      | 6621/8000 [5:44:39<1:14:57,  3.26s/it]

Episode 6621/8000, real env return = -18.57


 83%|█████████████████████████████      | 6631/8000 [5:45:11<1:14:55,  3.28s/it]

Episode 6631/8000, real env return = -15.86


 83%|█████████████████████████████      | 6641/8000 [5:45:45<1:14:31,  3.29s/it]

Episode 6641/8000, real env return = -20.78


 83%|█████████████████████████████      | 6651/8000 [5:46:18<1:14:04,  3.29s/it]

Episode 6651/8000, real env return = -14.92


 83%|█████████████████████████████▏     | 6661/8000 [5:46:50<1:14:06,  3.32s/it]

Episode 6661/8000, real env return = -15.61


 83%|█████████████████████████████▏     | 6671/8000 [5:47:23<1:12:14,  3.26s/it]

Episode 6671/8000, real env return = -14.36


 84%|█████████████████████████████▏     | 6681/8000 [5:47:56<1:12:26,  3.30s/it]

Episode 6681/8000, real env return = -17.96


 84%|█████████████████████████████▎     | 6691/8000 [5:48:29<1:10:29,  3.23s/it]

Episode 6691/8000, real env return = -18.89


 84%|█████████████████████████████▎     | 6701/8000 [5:49:01<1:08:38,  3.17s/it]

Episode 6701/8000, real env return = -14.48


 84%|█████████████████████████████▎     | 6711/8000 [5:49:33<1:09:08,  3.22s/it]

Episode 6711/8000, real env return = -14.47


 84%|█████████████████████████████▍     | 6721/8000 [5:50:06<1:10:08,  3.29s/it]

Episode 6721/8000, real env return = -14.42


 84%|█████████████████████████████▍     | 6731/8000 [5:50:39<1:10:39,  3.34s/it]

Episode 6731/8000, real env return = -13.57


 84%|█████████████████████████████▍     | 6741/8000 [5:51:11<1:06:26,  3.17s/it]

Episode 6741/8000, real env return = -14.29


 84%|█████████████████████████████▌     | 6751/8000 [5:51:44<1:08:19,  3.28s/it]

Episode 6751/8000, real env return = -14.39


 85%|█████████████████████████████▌     | 6761/8000 [5:52:15<1:01:34,  2.98s/it]

Episode 6761/8000, real env return = -14.06


 85%|█████████████████████████████▌     | 6771/8000 [5:52:48<1:07:56,  3.32s/it]

Episode 6771/8000, real env return = -13.48


 85%|█████████████████████████████▋     | 6781/8000 [5:53:21<1:06:33,  3.28s/it]

Episode 6781/8000, real env return = -12.87


 85%|█████████████████████████████▋     | 6791/8000 [5:53:52<1:04:07,  3.18s/it]

Episode 6791/8000, real env return = -12.83


 85%|█████████████████████████████▊     | 6801/8000 [5:54:26<1:05:53,  3.30s/it]

Episode 6801/8000, real env return = -13.37


 85%|█████████████████████████████▊     | 6811/8000 [5:54:58<1:04:50,  3.27s/it]

Episode 6811/8000, real env return = -14.69


 85%|█████████████████████████████▊     | 6821/8000 [5:55:31<1:05:10,  3.32s/it]

Episode 6821/8000, real env return = -13.38


 85%|█████████████████████████████▉     | 6831/8000 [5:56:02<1:03:53,  3.28s/it]

Episode 6831/8000, real env return = -12.86


 86%|█████████████████████████████▉     | 6841/8000 [5:56:35<1:04:03,  3.32s/it]

Episode 6841/8000, real env return = -14.25


 86%|█████████████████████████████▉     | 6851/8000 [5:57:06<1:00:26,  3.16s/it]

Episode 6851/8000, real env return = -20.15


 86%|██████████████████████████████     | 6861/8000 [5:57:40<1:02:49,  3.31s/it]

Episode 6861/8000, real env return = -17.42


 86%|██████████████████████████████     | 6871/8000 [5:58:12<1:00:51,  3.23s/it]

Episode 6871/8000, real env return = -13.04


 86%|██████████████████████████████     | 6881/8000 [5:58:45<1:00:05,  3.22s/it]

Episode 6881/8000, real env return = -14.95


 86%|███████████████████████████████▊     | 6891/8000 [5:59:17<59:03,  3.20s/it]

Episode 6891/8000, real env return = -27.59


 86%|██████████████████████████████▏    | 6901/8000 [5:59:50<1:00:43,  3.31s/it]

Episode 6901/8000, real env return = -12.92


 86%|███████████████████████████████▉     | 6911/8000 [6:00:22<59:35,  3.28s/it]

Episode 6911/8000, real env return = -12.66


 87%|████████████████████████████████     | 6921/8000 [6:00:55<59:11,  3.29s/it]

Episode 6921/8000, real env return = -12.25


 87%|████████████████████████████████     | 6931/8000 [6:01:28<59:00,  3.31s/it]

Episode 6931/8000, real env return = -12.78


 87%|████████████████████████████████     | 6941/8000 [6:02:01<59:08,  3.35s/it]

Episode 6941/8000, real env return = -17.45


 87%|████████████████████████████████▏    | 6951/8000 [6:02:34<57:42,  3.30s/it]

Episode 6951/8000, real env return = -13.84


 87%|████████████████████████████████▏    | 6961/8000 [6:03:06<56:29,  3.26s/it]

Episode 6961/8000, real env return = -12.03


 87%|████████████████████████████████▏    | 6971/8000 [6:03:39<56:06,  3.27s/it]

Episode 6971/8000, real env return = -14.25


 87%|████████████████████████████████▎    | 6981/8000 [6:04:11<55:38,  3.28s/it]

Episode 6981/8000, real env return = -13.04


 87%|████████████████████████████████▎    | 6991/8000 [6:04:44<55:16,  3.29s/it]

Episode 6991/8000, real env return = -13.01


 88%|████████████████████████████████▍    | 7001/8000 [6:05:17<54:42,  3.29s/it]

Episode 7001/8000, real env return = -13.88


 88%|████████████████████████████████▍    | 7011/8000 [6:05:49<49:43,  3.02s/it]

Episode 7011/8000, real env return = -15.91


 88%|████████████████████████████████▍    | 7021/8000 [6:06:21<49:47,  3.05s/it]

Episode 7021/8000, real env return = -17.19


 88%|████████████████████████████████▌    | 7031/8000 [6:06:54<53:21,  3.30s/it]

Episode 7031/8000, real env return = -20.13


 88%|████████████████████████████████▌    | 7041/8000 [6:07:26<52:39,  3.30s/it]

Episode 7041/8000, real env return = -22.84


 88%|████████████████████████████████▌    | 7051/8000 [6:07:59<51:16,  3.24s/it]

Episode 7051/8000, real env return = -12.80


 88%|████████████████████████████████▋    | 7061/8000 [6:08:32<51:53,  3.32s/it]

Episode 7061/8000, real env return = -12.62


 88%|████████████████████████████████▋    | 7071/8000 [6:09:05<50:58,  3.29s/it]

Episode 7071/8000, real env return = -20.78


 89%|████████████████████████████████▋    | 7081/8000 [6:09:37<50:32,  3.30s/it]

Episode 7081/8000, real env return = -15.38


 89%|████████████████████████████████▊    | 7091/8000 [6:10:10<50:29,  3.33s/it]

Episode 7091/8000, real env return = -14.52


 89%|████████████████████████████████▊    | 7101/8000 [6:10:43<45:49,  3.06s/it]

Episode 7101/8000, real env return = -14.26


 89%|████████████████████████████████▉    | 7111/8000 [6:11:15<47:33,  3.21s/it]

Episode 7111/8000, real env return = -18.39


 89%|████████████████████████████████▉    | 7121/8000 [6:11:45<45:00,  3.07s/it]

Episode 7121/8000, real env return = -15.73


 89%|████████████████████████████████▉    | 7131/8000 [6:12:18<47:52,  3.31s/it]

Episode 7131/8000, real env return = -11.65


 89%|█████████████████████████████████    | 7141/8000 [6:12:50<45:33,  3.18s/it]

Episode 7141/8000, real env return = -16.28


 89%|█████████████████████████████████    | 7151/8000 [6:13:23<46:02,  3.25s/it]

Episode 7151/8000, real env return = -13.82


 90%|█████████████████████████████████    | 7161/8000 [6:13:55<46:04,  3.29s/it]

Episode 7161/8000, real env return = -13.40


 90%|█████████████████████████████████▏   | 7171/8000 [6:14:28<45:19,  3.28s/it]

Episode 7171/8000, real env return = -12.37


 90%|█████████████████████████████████▏   | 7181/8000 [6:15:01<45:12,  3.31s/it]

Episode 7181/8000, real env return = -13.36


 90%|█████████████████████████████████▎   | 7191/8000 [6:15:34<43:46,  3.25s/it]

Episode 7191/8000, real env return = -13.39


 90%|█████████████████████████████████▎   | 7201/8000 [6:16:07<43:49,  3.29s/it]

Episode 7201/8000, real env return = -12.72


 90%|█████████████████████████████████▎   | 7211/8000 [6:16:40<43:02,  3.27s/it]

Episode 7211/8000, real env return = -15.02


 90%|█████████████████████████████████▍   | 7221/8000 [6:17:13<42:50,  3.30s/it]

Episode 7221/8000, real env return = -13.56


 90%|█████████████████████████████████▍   | 7231/8000 [6:17:46<42:43,  3.33s/it]

Episode 7231/8000, real env return = -14.98


 91%|█████████████████████████████████▍   | 7241/8000 [6:18:18<41:07,  3.25s/it]

Episode 7241/8000, real env return = -16.36


 91%|█████████████████████████████████▌   | 7251/8000 [6:18:51<40:42,  3.26s/it]

Episode 7251/8000, real env return = -15.19


 91%|█████████████████████████████████▌   | 7261/8000 [6:19:22<39:16,  3.19s/it]

Episode 7261/8000, real env return = -12.33


 91%|█████████████████████████████████▋   | 7271/8000 [6:19:55<39:40,  3.26s/it]

Episode 7271/8000, real env return = -12.48


 91%|█████████████████████████████████▋   | 7281/8000 [6:20:27<39:54,  3.33s/it]

Episode 7281/8000, real env return = -12.49


 91%|█████████████████████████████████▋   | 7291/8000 [6:21:00<39:22,  3.33s/it]

Episode 7291/8000, real env return = -13.91


 91%|█████████████████████████████████▊   | 7301/8000 [6:21:33<38:00,  3.26s/it]

Episode 7301/8000, real env return = -15.40


 91%|█████████████████████████████████▊   | 7311/8000 [6:22:06<37:26,  3.26s/it]

Episode 7311/8000, real env return = -19.63


 92%|█████████████████████████████████▊   | 7321/8000 [6:22:39<36:41,  3.24s/it]

Episode 7321/8000, real env return = -12.72


 92%|█████████████████████████████████▉   | 7331/8000 [6:23:11<36:24,  3.27s/it]

Episode 7331/8000, real env return = -15.70


 92%|█████████████████████████████████▉   | 7341/8000 [6:23:44<36:19,  3.31s/it]

Episode 7341/8000, real env return = -13.93


 92%|█████████████████████████████████▉   | 7351/8000 [6:24:17<36:19,  3.36s/it]

Episode 7351/8000, real env return = -16.64


 92%|██████████████████████████████████   | 7361/8000 [6:24:50<34:55,  3.28s/it]

Episode 7361/8000, real env return = -13.42


 92%|██████████████████████████████████   | 7371/8000 [6:25:23<34:43,  3.31s/it]

Episode 7371/8000, real env return = -13.75


 92%|██████████████████████████████████▏  | 7381/8000 [6:25:56<34:17,  3.32s/it]

Episode 7381/8000, real env return = -13.85


 92%|██████████████████████████████████▏  | 7391/8000 [6:26:29<33:08,  3.27s/it]

Episode 7391/8000, real env return = -15.66


 93%|██████████████████████████████████▏  | 7401/8000 [6:27:02<32:53,  3.29s/it]

Episode 7401/8000, real env return = -13.05


 93%|██████████████████████████████████▎  | 7411/8000 [6:27:35<32:13,  3.28s/it]

Episode 7411/8000, real env return = -11.40


 93%|██████████████████████████████████▎  | 7421/8000 [6:28:06<28:25,  2.95s/it]

Episode 7421/8000, real env return = -12.13


 93%|██████████████████████████████████▎  | 7431/8000 [6:28:39<30:48,  3.25s/it]

Episode 7431/8000, real env return = -14.04


 93%|██████████████████████████████████▍  | 7441/8000 [6:29:11<30:23,  3.26s/it]

Episode 7441/8000, real env return = -16.76


 93%|██████████████████████████████████▍  | 7451/8000 [6:29:44<29:29,  3.22s/it]

Episode 7451/8000, real env return = -12.58


 93%|██████████████████████████████████▌  | 7461/8000 [6:30:16<29:10,  3.25s/it]

Episode 7461/8000, real env return = -14.08


 93%|██████████████████████████████████▌  | 7471/8000 [6:30:49<28:50,  3.27s/it]

Episode 7471/8000, real env return = -14.08


 94%|██████████████████████████████████▌  | 7481/8000 [6:31:22<28:56,  3.35s/it]

Episode 7481/8000, real env return = -11.63


 94%|██████████████████████████████████▋  | 7491/8000 [6:31:55<27:51,  3.28s/it]

Episode 7491/8000, real env return = -16.12


 94%|██████████████████████████████████▋  | 7501/8000 [6:32:27<26:07,  3.14s/it]

Episode 7501/8000, real env return = -20.01


 94%|██████████████████████████████████▋  | 7511/8000 [6:33:01<26:50,  3.29s/it]

Episode 7511/8000, real env return = -16.64


 94%|██████████████████████████████████▊  | 7521/8000 [6:33:33<25:52,  3.24s/it]

Episode 7521/8000, real env return = -14.07


 94%|██████████████████████████████████▊  | 7531/8000 [6:34:05<24:06,  3.08s/it]

Episode 7531/8000, real env return = -19.60


 94%|██████████████████████████████████▉  | 7541/8000 [6:34:38<25:23,  3.32s/it]

Episode 7541/8000, real env return = -18.01


 94%|██████████████████████████████████▉  | 7551/8000 [6:35:11<24:34,  3.28s/it]

Episode 7551/8000, real env return = -14.89


 95%|██████████████████████████████████▉  | 7561/8000 [6:35:43<22:54,  3.13s/it]

Episode 7561/8000, real env return = -14.78


 95%|███████████████████████████████████  | 7571/8000 [6:36:15<23:46,  3.33s/it]

Episode 7571/8000, real env return = -15.39


 95%|███████████████████████████████████  | 7581/8000 [6:36:48<22:13,  3.18s/it]

Episode 7581/8000, real env return = -12.74


 95%|███████████████████████████████████  | 7591/8000 [6:37:20<21:53,  3.21s/it]

Episode 7591/8000, real env return = -17.75


 95%|███████████████████████████████████▏ | 7601/8000 [6:37:54<22:15,  3.35s/it]

Episode 7601/8000, real env return = -14.41


 95%|███████████████████████████████████▏ | 7611/8000 [6:38:27<21:39,  3.34s/it]

Episode 7611/8000, real env return = -10.60


 95%|███████████████████████████████████▏ | 7621/8000 [6:39:00<20:36,  3.26s/it]

Episode 7621/8000, real env return = -19.30


 95%|███████████████████████████████████▎ | 7631/8000 [6:39:33<20:14,  3.29s/it]

Episode 7631/8000, real env return = -11.34


 96%|███████████████████████████████████▎ | 7641/8000 [6:40:05<19:33,  3.27s/it]

Episode 7641/8000, real env return = -11.33


 96%|███████████████████████████████████▍ | 7651/8000 [6:40:38<19:05,  3.28s/it]

Episode 7651/8000, real env return = -11.70


 96%|███████████████████████████████████▍ | 7661/8000 [6:41:10<18:43,  3.31s/it]

Episode 7661/8000, real env return = -16.86


 96%|███████████████████████████████████▍ | 7671/8000 [6:41:43<17:38,  3.22s/it]

Episode 7671/8000, real env return = -16.12


 96%|███████████████████████████████████▌ | 7681/8000 [6:42:16<16:54,  3.18s/it]

Episode 7681/8000, real env return = -12.10


 96%|███████████████████████████████████▌ | 7691/8000 [6:42:49<16:59,  3.30s/it]

Episode 7691/8000, real env return = -11.84


 96%|███████████████████████████████████▌ | 7701/8000 [6:43:22<16:26,  3.30s/it]

Episode 7701/8000, real env return = -12.94


 96%|███████████████████████████████████▋ | 7711/8000 [6:43:55<15:56,  3.31s/it]

Episode 7711/8000, real env return = -12.58


 97%|███████████████████████████████████▋ | 7721/8000 [6:44:27<15:13,  3.27s/it]

Episode 7721/8000, real env return = -12.54


 97%|███████████████████████████████████▊ | 7731/8000 [6:45:00<14:30,  3.24s/it]

Episode 7731/8000, real env return = -13.06


 97%|███████████████████████████████████▊ | 7741/8000 [6:45:33<14:19,  3.32s/it]

Episode 7741/8000, real env return = -12.45


 97%|███████████████████████████████████▊ | 7751/8000 [6:46:05<13:30,  3.26s/it]

Episode 7751/8000, real env return = -12.89


 97%|███████████████████████████████████▉ | 7761/8000 [6:46:38<13:06,  3.29s/it]

Episode 7761/8000, real env return = -12.67


 97%|███████████████████████████████████▉ | 7771/8000 [6:47:12<12:39,  3.32s/it]

Episode 7771/8000, real env return = -13.80


 97%|███████████████████████████████████▉ | 7781/8000 [6:47:45<11:55,  3.26s/it]

Episode 7781/8000, real env return = -12.68


 97%|████████████████████████████████████ | 7791/8000 [6:48:17<11:18,  3.25s/it]

Episode 7791/8000, real env return = -16.64


 98%|████████████████████████████████████ | 7801/8000 [6:48:48<10:43,  3.24s/it]

Episode 7801/8000, real env return = -16.50


 98%|████████████████████████████████████▏| 7811/8000 [6:49:22<10:31,  3.34s/it]

Episode 7811/8000, real env return = -13.86


 98%|████████████████████████████████████▏| 7821/8000 [6:49:54<09:49,  3.29s/it]

Episode 7821/8000, real env return = -14.01


 98%|████████████████████████████████████▏| 7831/8000 [6:50:27<09:13,  3.27s/it]

Episode 7831/8000, real env return = -12.57


 98%|████████████████████████████████████▎| 7841/8000 [6:51:00<08:48,  3.32s/it]

Episode 7841/8000, real env return = -15.82


 98%|████████████████████████████████████▎| 7851/8000 [6:51:32<08:07,  3.27s/it]

Episode 7851/8000, real env return = -17.51


 98%|████████████████████████████████████▎| 7861/8000 [6:52:04<07:32,  3.25s/it]

Episode 7861/8000, real env return = -11.81


 98%|████████████████████████████████████▍| 7871/8000 [6:52:37<07:05,  3.30s/it]

Episode 7871/8000, real env return = -13.00


 99%|████████████████████████████████████▍| 7881/8000 [6:53:10<06:24,  3.23s/it]

Episode 7881/8000, real env return = -14.20


 99%|████████████████████████████████████▍| 7891/8000 [6:53:43<05:52,  3.24s/it]

Episode 7891/8000, real env return = -17.35


 99%|████████████████████████████████████▌| 7901/8000 [6:54:17<05:30,  3.34s/it]

Episode 7901/8000, real env return = -17.15


 99%|████████████████████████████████████▌| 7911/8000 [6:54:49<04:35,  3.10s/it]

Episode 7911/8000, real env return = -12.29


 99%|████████████████████████████████████▋| 7921/8000 [6:55:22<04:22,  3.32s/it]

Episode 7921/8000, real env return = -12.52


 99%|████████████████████████████████████▋| 7931/8000 [6:55:54<03:42,  3.22s/it]

Episode 7931/8000, real env return = -12.19


 99%|████████████████████████████████████▋| 7941/8000 [6:56:27<03:12,  3.26s/it]

Episode 7941/8000, real env return = -11.58


 99%|████████████████████████████████████▊| 7951/8000 [6:57:00<02:41,  3.30s/it]

Episode 7951/8000, real env return = -17.00


100%|████████████████████████████████████▊| 7961/8000 [6:57:33<02:09,  3.32s/it]

Episode 7961/8000, real env return = -19.88


100%|████████████████████████████████████▊| 7971/8000 [6:58:05<01:34,  3.27s/it]

Episode 7971/8000, real env return = -19.58


100%|████████████████████████████████████▉| 7981/8000 [6:58:39<01:03,  3.33s/it]

Episode 7981/8000, real env return = -20.99


100%|████████████████████████████████████▉| 7991/8000 [6:59:11<00:29,  3.27s/it]

Episode 7991/8000, real env return = -15.72


100%|█████████████████████████████████████| 8000/8000 [6:59:41<00:00,  3.15s/it]


Training finished.


In [12]:
train5 = main()

Using cuda device


  0%|                                                  | 0/8000 [00:00<?, ?it/s]

Episode 1/8000, real env return = -103.58


  0%|                                       | 11/8000 [00:13<3:36:45,  1.63s/it]

Episode 11/8000, real env return = -93.20


  0%|                                       | 21/8000 [00:33<4:27:00,  2.01s/it]

Episode 21/8000, real env return = -76.55


  0%|▏                                      | 31/8000 [00:48<3:23:41,  1.53s/it]

Episode 31/8000, real env return = -93.79


  1%|▏                                      | 41/8000 [01:09<5:06:04,  2.31s/it]

Episode 41/8000, real env return = -84.88


  1%|▏                                      | 51/8000 [01:30<4:36:31,  2.09s/it]

Episode 51/8000, real env return = -109.03


  1%|▎                                      | 61/8000 [01:50<4:19:04,  1.96s/it]

Episode 61/8000, real env return = -100.40


  1%|▎                                      | 71/8000 [02:08<4:10:55,  1.90s/it]

Episode 71/8000, real env return = -79.82


  1%|▍                                      | 81/8000 [02:32<5:24:49,  2.46s/it]

Episode 81/8000, real env return = -154.81


  1%|▍                                      | 91/8000 [02:55<5:14:52,  2.39s/it]

Episode 91/8000, real env return = -80.96


  1%|▍                                     | 101/8000 [03:16<5:08:31,  2.34s/it]

Episode 101/8000, real env return = -58.84


  1%|▌                                     | 111/8000 [03:40<5:27:19,  2.49s/it]

Episode 111/8000, real env return = -123.54


  2%|▌                                     | 121/8000 [04:05<5:35:54,  2.56s/it]

Episode 121/8000, real env return = -47.40


  2%|▌                                     | 131/8000 [04:29<4:45:23,  2.18s/it]

Episode 131/8000, real env return = -54.95


  2%|▋                                     | 141/8000 [04:54<5:13:52,  2.40s/it]

Episode 141/8000, real env return = -75.58


  2%|▋                                     | 151/8000 [05:19<5:45:26,  2.64s/it]

Episode 151/8000, real env return = -130.59


  2%|▊                                     | 161/8000 [05:46<5:42:18,  2.62s/it]

Episode 161/8000, real env return = -132.57


  2%|▊                                     | 171/8000 [06:10<5:02:22,  2.32s/it]

Episode 171/8000, real env return = -127.93


  2%|▊                                     | 181/8000 [06:37<5:34:58,  2.57s/it]

Episode 181/8000, real env return = -123.69


  2%|▉                                     | 191/8000 [07:02<5:46:30,  2.66s/it]

Episode 191/8000, real env return = -112.61


  3%|▉                                     | 201/8000 [07:31<6:14:57,  2.88s/it]

Episode 201/8000, real env return = -109.91


  3%|█                                     | 211/8000 [07:56<5:13:43,  2.42s/it]

Episode 211/8000, real env return = -106.49


  3%|█                                     | 221/8000 [08:23<5:52:39,  2.72s/it]

Episode 221/8000, real env return = -236.75


  3%|█                                     | 231/8000 [08:50<5:40:21,  2.63s/it]

Episode 231/8000, real env return = -231.20


  3%|█▏                                    | 241/8000 [09:13<4:59:57,  2.32s/it]

Episode 241/8000, real env return = -129.82


  3%|█▏                                    | 251/8000 [09:39<5:50:50,  2.72s/it]

Episode 251/8000, real env return = -104.90


  3%|█▏                                    | 261/8000 [10:09<6:33:33,  3.05s/it]

Episode 261/8000, real env return = -107.58


  3%|█▎                                    | 271/8000 [10:37<5:37:33,  2.62s/it]

Episode 271/8000, real env return = -75.23


  4%|█▎                                    | 281/8000 [11:05<6:00:59,  2.81s/it]

Episode 281/8000, real env return = -103.15


  4%|█▍                                    | 291/8000 [11:32<5:53:18,  2.75s/it]

Episode 291/8000, real env return = -78.93


  4%|█▍                                    | 301/8000 [11:59<6:08:00,  2.87s/it]

Episode 301/8000, real env return = -69.89


  4%|█▍                                    | 311/8000 [12:26<6:07:54,  2.87s/it]

Episode 311/8000, real env return = -94.49


  4%|█▌                                    | 321/8000 [12:53<5:36:09,  2.63s/it]

Episode 321/8000, real env return = -84.15


  4%|█▌                                    | 331/8000 [13:22<6:12:41,  2.92s/it]

Episode 331/8000, real env return = -97.49


  4%|█▌                                    | 341/8000 [13:51<5:54:06,  2.77s/it]

Episode 341/8000, real env return = -103.99


  4%|█▋                                    | 351/8000 [14:20<5:56:58,  2.80s/it]

Episode 351/8000, real env return = -95.27


  5%|█▋                                    | 361/8000 [14:50<6:38:02,  3.13s/it]

Episode 361/8000, real env return = -88.78


  5%|█▊                                    | 371/8000 [15:22<6:50:14,  3.23s/it]

Episode 371/8000, real env return = -112.10


  5%|█▊                                    | 381/8000 [15:49<5:37:45,  2.66s/it]

Episode 381/8000, real env return = -117.99


  5%|█▊                                    | 391/8000 [16:19<6:34:55,  3.11s/it]

Episode 391/8000, real env return = -119.97


  5%|█▉                                    | 401/8000 [16:48<5:58:39,  2.83s/it]

Episode 401/8000, real env return = -104.82


  5%|█▉                                    | 411/8000 [17:17<5:51:06,  2.78s/it]

Episode 411/8000, real env return = -89.80


  5%|█▉                                    | 421/8000 [17:46<6:07:47,  2.91s/it]

Episode 421/8000, real env return = -94.25


  5%|██                                    | 431/8000 [18:16<6:12:45,  2.95s/it]

Episode 431/8000, real env return = -90.44


  6%|██                                    | 441/8000 [18:46<6:15:43,  2.98s/it]

Episode 441/8000, real env return = -85.42


  6%|██▏                                   | 451/8000 [19:15<6:17:06,  3.00s/it]

Episode 451/8000, real env return = -60.83


  6%|██▏                                   | 461/8000 [19:46<6:28:33,  3.09s/it]

Episode 461/8000, real env return = -68.18


  6%|██▏                                   | 471/8000 [20:16<6:21:54,  3.04s/it]

Episode 471/8000, real env return = -71.59


  6%|██▎                                   | 481/8000 [20:45<6:06:49,  2.93s/it]

Episode 481/8000, real env return = -71.00


  6%|██▎                                   | 491/8000 [21:15<6:23:04,  3.06s/it]

Episode 491/8000, real env return = -78.28


  6%|██▍                                   | 501/8000 [21:45<6:25:29,  3.08s/it]

Episode 501/8000, real env return = -67.21


  6%|██▍                                   | 511/8000 [22:16<6:42:01,  3.22s/it]

Episode 511/8000, real env return = -69.29


  7%|██▍                                   | 521/8000 [22:46<6:27:08,  3.11s/it]

Episode 521/8000, real env return = -68.54


  7%|██▌                                   | 531/8000 [23:17<6:33:33,  3.16s/it]

Episode 531/8000, real env return = -67.58


  7%|██▌                                   | 541/8000 [23:48<6:23:23,  3.08s/it]

Episode 541/8000, real env return = -45.22


  7%|██▌                                   | 551/8000 [24:17<6:00:06,  2.90s/it]

Episode 551/8000, real env return = -56.77


  7%|██▋                                   | 561/8000 [24:49<6:42:04,  3.24s/it]

Episode 561/8000, real env return = -70.39


  7%|██▋                                   | 571/8000 [25:20<6:38:37,  3.22s/it]

Episode 571/8000, real env return = -55.73


  7%|██▊                                   | 581/8000 [25:51<6:11:22,  3.00s/it]

Episode 581/8000, real env return = -42.94


  7%|██▊                                   | 591/8000 [26:22<6:36:02,  3.21s/it]

Episode 591/8000, real env return = -34.97


  8%|██▊                                   | 601/8000 [26:50<5:31:20,  2.69s/it]

Episode 601/8000, real env return = -108.02


  8%|██▉                                   | 611/8000 [27:20<5:55:32,  2.89s/it]

Episode 611/8000, real env return = -113.33


  8%|██▉                                   | 621/8000 [27:51<6:00:26,  2.93s/it]

Episode 621/8000, real env return = -35.25


  8%|██▉                                   | 631/8000 [28:22<6:29:42,  3.17s/it]

Episode 631/8000, real env return = -29.26


  8%|███                                   | 641/8000 [28:53<6:21:16,  3.11s/it]

Episode 641/8000, real env return = -27.41


  8%|███                                   | 651/8000 [29:23<6:29:23,  3.18s/it]

Episode 651/8000, real env return = -47.04


  8%|███▏                                  | 661/8000 [29:53<6:15:02,  3.07s/it]

Episode 661/8000, real env return = -38.35


  8%|███▏                                  | 671/8000 [30:25<6:28:23,  3.18s/it]

Episode 671/8000, real env return = -24.74


  9%|███▏                                  | 681/8000 [30:55<5:38:44,  2.78s/it]

Episode 681/8000, real env return = -31.93


  9%|███▎                                  | 691/8000 [31:27<6:25:54,  3.17s/it]

Episode 691/8000, real env return = -30.89


  9%|███▎                                  | 701/8000 [31:57<6:08:41,  3.03s/it]

Episode 701/8000, real env return = -38.60


  9%|███▍                                  | 711/8000 [32:27<5:57:07,  2.94s/it]

Episode 711/8000, real env return = -31.40


  9%|███▍                                  | 721/8000 [32:58<6:12:44,  3.07s/it]

Episode 721/8000, real env return = -29.79


  9%|███▍                                  | 731/8000 [33:26<5:37:05,  2.78s/it]

Episode 731/8000, real env return = -28.03


  9%|███▌                                  | 741/8000 [33:56<6:11:28,  3.07s/it]

Episode 741/8000, real env return = -22.77


  9%|███▌                                  | 751/8000 [34:27<6:25:40,  3.19s/it]

Episode 751/8000, real env return = -35.52


 10%|███▌                                  | 761/8000 [34:57<6:11:11,  3.08s/it]

Episode 761/8000, real env return = -28.33


 10%|███▋                                  | 771/8000 [35:29<6:11:14,  3.08s/it]

Episode 771/8000, real env return = -26.64


 10%|███▋                                  | 781/8000 [35:59<5:56:00,  2.96s/it]

Episode 781/8000, real env return = -30.39


 10%|███▊                                  | 791/8000 [36:30<6:24:06,  3.20s/it]

Episode 791/8000, real env return = -29.04


 10%|███▊                                  | 801/8000 [37:01<6:12:56,  3.11s/it]

Episode 801/8000, real env return = -32.55


 10%|███▊                                  | 811/8000 [37:31<5:53:38,  2.95s/it]

Episode 811/8000, real env return = -28.00


 10%|███▉                                  | 821/8000 [38:01<6:11:15,  3.10s/it]

Episode 821/8000, real env return = -35.16


 10%|███▉                                  | 831/8000 [38:32<6:21:40,  3.19s/it]

Episode 831/8000, real env return = -30.29


 11%|███▉                                  | 841/8000 [39:03<5:50:55,  2.94s/it]

Episode 841/8000, real env return = -128.05


 11%|████                                  | 851/8000 [39:35<6:11:49,  3.12s/it]

Episode 851/8000, real env return = -32.11


 11%|████                                  | 861/8000 [40:05<5:36:55,  2.83s/it]

Episode 861/8000, real env return = -25.47


 11%|████▏                                 | 871/8000 [40:37<6:12:09,  3.13s/it]

Episode 871/8000, real env return = -37.66


 11%|████▏                                 | 881/8000 [41:09<6:29:19,  3.28s/it]

Episode 881/8000, real env return = -25.49


 11%|████▏                                 | 891/8000 [41:41<6:21:43,  3.22s/it]

Episode 891/8000, real env return = -37.04


 11%|████▎                                 | 901/8000 [42:12<5:42:16,  2.89s/it]

Episode 901/8000, real env return = -35.56


 11%|████▎                                 | 911/8000 [42:42<5:45:22,  2.92s/it]

Episode 911/8000, real env return = -38.72


 12%|████▎                                 | 921/8000 [43:13<5:54:36,  3.01s/it]

Episode 921/8000, real env return = -29.24


 12%|████▍                                 | 931/8000 [43:42<5:42:58,  2.91s/it]

Episode 931/8000, real env return = -31.64


 12%|████▍                                 | 941/8000 [44:15<6:25:34,  3.28s/it]

Episode 941/8000, real env return = -29.49


 12%|████▌                                 | 951/8000 [44:46<6:13:46,  3.18s/it]

Episode 951/8000, real env return = -36.40


 12%|████▌                                 | 961/8000 [45:15<5:41:54,  2.91s/it]

Episode 961/8000, real env return = -26.34


 12%|████▌                                 | 971/8000 [45:47<6:18:09,  3.23s/it]

Episode 971/8000, real env return = -32.70


 12%|████▋                                 | 981/8000 [46:19<6:10:27,  3.17s/it]

Episode 981/8000, real env return = -27.72


 12%|████▋                                 | 991/8000 [46:50<6:15:26,  3.21s/it]

Episode 991/8000, real env return = -35.47


 13%|████▋                                | 1001/8000 [47:21<6:10:16,  3.17s/it]

Episode 1001/8000, real env return = -26.55


 13%|████▋                                | 1011/8000 [47:53<6:11:41,  3.19s/it]

Episode 1011/8000, real env return = -24.74


 13%|████▋                                | 1021/8000 [48:23<5:46:21,  2.98s/it]

Episode 1021/8000, real env return = -25.94


 13%|████▊                                | 1031/8000 [48:55<6:15:02,  3.23s/it]

Episode 1031/8000, real env return = -26.13


 13%|████▊                                | 1041/8000 [49:27<5:57:10,  3.08s/it]

Episode 1041/8000, real env return = -26.37


 13%|████▊                                | 1051/8000 [49:58<6:05:00,  3.15s/it]

Episode 1051/8000, real env return = -25.79


 13%|████▉                                | 1061/8000 [50:30<6:16:08,  3.25s/it]

Episode 1061/8000, real env return = -24.47


 13%|████▉                                | 1071/8000 [51:02<5:54:46,  3.07s/it]

Episode 1071/8000, real env return = -28.54


 14%|████▉                                | 1081/8000 [51:28<5:27:16,  2.84s/it]

Episode 1081/8000, real env return = -29.25


 14%|█████                                | 1091/8000 [52:00<6:03:47,  3.16s/it]

Episode 1091/8000, real env return = -24.21


 14%|█████                                | 1101/8000 [52:32<6:15:53,  3.27s/it]

Episode 1101/8000, real env return = -23.46


 14%|█████▏                               | 1111/8000 [53:03<6:01:46,  3.15s/it]

Episode 1111/8000, real env return = -24.70


 14%|█████▏                               | 1121/8000 [53:34<5:55:51,  3.10s/it]

Episode 1121/8000, real env return = -29.78


 14%|█████▏                               | 1131/8000 [54:06<6:10:15,  3.23s/it]

Episode 1131/8000, real env return = -40.98


 14%|█████▎                               | 1141/8000 [54:38<6:03:38,  3.18s/it]

Episode 1141/8000, real env return = -21.25


 14%|█████▎                               | 1151/8000 [55:09<5:55:13,  3.11s/it]

Episode 1151/8000, real env return = -26.61


 15%|█████▎                               | 1161/8000 [55:41<5:52:06,  3.09s/it]

Episode 1161/8000, real env return = -26.58


 15%|█████▍                               | 1171/8000 [56:13<6:07:52,  3.23s/it]

Episode 1171/8000, real env return = -23.41


 15%|█████▍                               | 1181/8000 [56:44<5:56:25,  3.14s/it]

Episode 1181/8000, real env return = -25.74


 15%|█████▌                               | 1191/8000 [57:15<5:50:46,  3.09s/it]

Episode 1191/8000, real env return = -23.03


 15%|█████▌                               | 1201/8000 [57:46<5:52:50,  3.11s/it]

Episode 1201/8000, real env return = -23.52


 15%|█████▌                               | 1211/8000 [58:17<5:58:32,  3.17s/it]

Episode 1211/8000, real env return = -20.46


 15%|█████▋                               | 1221/8000 [58:49<5:42:14,  3.03s/it]

Episode 1221/8000, real env return = -21.42


 15%|█████▋                               | 1231/8000 [59:20<5:55:58,  3.16s/it]

Episode 1231/8000, real env return = -25.18


 16%|█████▋                               | 1241/8000 [59:51<5:37:13,  2.99s/it]

Episode 1241/8000, real env return = -23.26


 16%|█████▍                             | 1251/8000 [1:00:23<6:00:03,  3.20s/it]

Episode 1251/8000, real env return = -23.01


 16%|█████▌                             | 1261/8000 [1:00:56<5:59:15,  3.20s/it]

Episode 1261/8000, real env return = -24.12


 16%|█████▌                             | 1271/8000 [1:01:27<5:33:12,  2.97s/it]

Episode 1271/8000, real env return = -24.71


 16%|█████▌                             | 1281/8000 [1:01:55<5:39:45,  3.03s/it]

Episode 1281/8000, real env return = -21.11


 16%|█████▋                             | 1291/8000 [1:02:27<5:50:09,  3.13s/it]

Episode 1291/8000, real env return = -25.35


 16%|█████▋                             | 1301/8000 [1:02:57<5:43:24,  3.08s/it]

Episode 1301/8000, real env return = -24.56


 16%|█████▋                             | 1311/8000 [1:03:28<5:56:24,  3.20s/it]

Episode 1311/8000, real env return = -23.19


 17%|█████▊                             | 1321/8000 [1:03:58<5:23:15,  2.90s/it]

Episode 1321/8000, real env return = -30.30


 17%|█████▊                             | 1331/8000 [1:04:30<5:51:07,  3.16s/it]

Episode 1331/8000, real env return = -24.62


 17%|█████▊                             | 1341/8000 [1:05:00<5:50:42,  3.16s/it]

Episode 1341/8000, real env return = -103.62


 17%|█████▉                             | 1351/8000 [1:05:31<5:36:15,  3.03s/it]

Episode 1351/8000, real env return = -36.22


 17%|█████▉                             | 1361/8000 [1:06:03<5:56:02,  3.22s/it]

Episode 1361/8000, real env return = -29.25


 17%|█████▉                             | 1371/8000 [1:06:35<5:57:26,  3.24s/it]

Episode 1371/8000, real env return = -31.25


 17%|██████                             | 1381/8000 [1:07:07<5:46:53,  3.14s/it]

Episode 1381/8000, real env return = -26.02


 17%|██████                             | 1391/8000 [1:07:38<5:55:00,  3.22s/it]

Episode 1391/8000, real env return = -26.24


 18%|██████▏                            | 1401/8000 [1:08:11<5:59:50,  3.27s/it]

Episode 1401/8000, real env return = -30.36


 18%|██████▏                            | 1411/8000 [1:08:42<5:31:02,  3.01s/it]

Episode 1411/8000, real env return = -24.15


 18%|██████▏                            | 1421/8000 [1:09:14<5:55:53,  3.25s/it]

Episode 1421/8000, real env return = -22.96


 18%|██████▎                            | 1431/8000 [1:09:46<6:00:43,  3.29s/it]

Episode 1431/8000, real env return = -24.24


 18%|██████▎                            | 1441/8000 [1:10:17<5:41:41,  3.13s/it]

Episode 1441/8000, real env return = -26.38


 18%|██████▎                            | 1451/8000 [1:10:49<5:59:49,  3.30s/it]

Episode 1451/8000, real env return = -31.02


 18%|██████▍                            | 1461/8000 [1:11:22<5:56:33,  3.27s/it]

Episode 1461/8000, real env return = -29.69


 18%|██████▍                            | 1471/8000 [1:11:52<5:34:32,  3.07s/it]

Episode 1471/8000, real env return = -30.34


 19%|██████▍                            | 1481/8000 [1:12:25<5:53:12,  3.25s/it]

Episode 1481/8000, real env return = -24.33


 19%|██████▌                            | 1491/8000 [1:12:56<5:42:57,  3.16s/it]

Episode 1491/8000, real env return = -25.32


 19%|██████▌                            | 1501/8000 [1:13:27<5:35:56,  3.10s/it]

Episode 1501/8000, real env return = -23.85


 19%|██████▌                            | 1511/8000 [1:14:00<5:51:40,  3.25s/it]

Episode 1511/8000, real env return = -21.09


 19%|██████▋                            | 1521/8000 [1:14:33<5:56:04,  3.30s/it]

Episode 1521/8000, real env return = -29.57


 19%|██████▋                            | 1531/8000 [1:15:04<5:39:48,  3.15s/it]

Episode 1531/8000, real env return = -27.85


 19%|██████▋                            | 1541/8000 [1:15:35<5:32:18,  3.09s/it]

Episode 1541/8000, real env return = -28.51


 19%|██████▊                            | 1551/8000 [1:16:08<5:51:32,  3.27s/it]

Episode 1551/8000, real env return = -27.57


 20%|██████▊                            | 1561/8000 [1:16:40<5:33:30,  3.11s/it]

Episode 1561/8000, real env return = -27.60


 20%|██████▊                            | 1571/8000 [1:17:11<5:44:11,  3.21s/it]

Episode 1571/8000, real env return = -25.55


 20%|██████▉                            | 1581/8000 [1:17:43<5:47:19,  3.25s/it]

Episode 1581/8000, real env return = -22.91


 20%|██████▉                            | 1591/8000 [1:18:15<5:44:37,  3.23s/it]

Episode 1591/8000, real env return = -27.11


 20%|███████                            | 1601/8000 [1:18:47<5:30:26,  3.10s/it]

Episode 1601/8000, real env return = -23.65


 20%|███████                            | 1611/8000 [1:19:18<5:39:24,  3.19s/it]

Episode 1611/8000, real env return = -22.18


 20%|███████                            | 1621/8000 [1:19:50<5:35:29,  3.16s/it]

Episode 1621/8000, real env return = -22.79


 20%|███████▏                           | 1631/8000 [1:20:22<5:43:50,  3.24s/it]

Episode 1631/8000, real env return = -16.16


 21%|███████▏                           | 1641/8000 [1:20:54<5:42:26,  3.23s/it]

Episode 1641/8000, real env return = -24.95


 21%|███████▏                           | 1651/8000 [1:21:25<5:34:01,  3.16s/it]

Episode 1651/8000, real env return = -24.74


 21%|███████▎                           | 1661/8000 [1:21:56<5:28:37,  3.11s/it]

Episode 1661/8000, real env return = -21.07


 21%|███████▎                           | 1671/8000 [1:22:28<5:33:37,  3.16s/it]

Episode 1671/8000, real env return = -25.68


 21%|███████▎                           | 1681/8000 [1:22:59<5:28:56,  3.12s/it]

Episode 1681/8000, real env return = -20.88


 21%|███████▍                           | 1691/8000 [1:23:31<5:43:20,  3.27s/it]

Episode 1691/8000, real env return = -20.68


 21%|███████▍                           | 1701/8000 [1:24:02<5:25:18,  3.10s/it]

Episode 1701/8000, real env return = -20.54


 21%|███████▍                           | 1711/8000 [1:24:35<5:35:59,  3.21s/it]

Episode 1711/8000, real env return = -24.62


 22%|███████▌                           | 1721/8000 [1:25:06<5:29:29,  3.15s/it]

Episode 1721/8000, real env return = -21.57


 22%|███████▌                           | 1731/8000 [1:25:39<5:38:27,  3.24s/it]

Episode 1731/8000, real env return = -21.09


 22%|███████▌                           | 1741/8000 [1:26:11<5:39:04,  3.25s/it]

Episode 1741/8000, real env return = -24.53


 22%|███████▋                           | 1751/8000 [1:26:42<5:37:38,  3.24s/it]

Episode 1751/8000, real env return = -28.52


 22%|███████▋                           | 1761/8000 [1:27:14<5:35:25,  3.23s/it]

Episode 1761/8000, real env return = -26.87


 22%|███████▋                           | 1771/8000 [1:27:45<5:03:08,  2.92s/it]

Episode 1771/8000, real env return = -21.77


 22%|███████▊                           | 1781/8000 [1:28:13<4:10:58,  2.42s/it]

Episode 1781/8000, real env return = -123.85


 22%|███████▊                           | 1791/8000 [1:28:42<5:02:34,  2.92s/it]

Episode 1791/8000, real env return = -23.49


 23%|███████▉                           | 1801/8000 [1:29:13<5:18:33,  3.08s/it]

Episode 1801/8000, real env return = -22.91


 23%|███████▉                           | 1811/8000 [1:29:44<5:16:34,  3.07s/it]

Episode 1811/8000, real env return = -23.03


 23%|███████▉                           | 1821/8000 [1:30:15<5:26:02,  3.17s/it]

Episode 1821/8000, real env return = -40.75


 23%|████████                           | 1831/8000 [1:30:46<5:05:22,  2.97s/it]

Episode 1831/8000, real env return = -25.88


 23%|████████                           | 1841/8000 [1:31:17<5:10:04,  3.02s/it]

Episode 1841/8000, real env return = -26.90


 23%|████████                           | 1851/8000 [1:31:48<5:17:38,  3.10s/it]

Episode 1851/8000, real env return = -23.66


 23%|████████▏                          | 1861/8000 [1:32:19<5:30:19,  3.23s/it]

Episode 1861/8000, real env return = -24.17


 23%|████████▏                          | 1871/8000 [1:32:51<5:17:46,  3.11s/it]

Episode 1871/8000, real env return = -21.71


 24%|████████▏                          | 1881/8000 [1:33:22<5:20:34,  3.14s/it]

Episode 1881/8000, real env return = -30.91


 24%|████████▎                          | 1891/8000 [1:33:54<5:17:22,  3.12s/it]

Episode 1891/8000, real env return = -22.55


 24%|████████▎                          | 1901/8000 [1:34:26<5:15:51,  3.11s/it]

Episode 1901/8000, real env return = -23.34


 24%|████████▎                          | 1911/8000 [1:34:55<4:34:59,  2.71s/it]

Episode 1911/8000, real env return = -24.53


 24%|████████▍                          | 1921/8000 [1:35:28<5:33:24,  3.29s/it]

Episode 1921/8000, real env return = -22.65


 24%|████████▍                          | 1931/8000 [1:35:59<5:25:02,  3.21s/it]

Episode 1931/8000, real env return = -23.17


 24%|████████▍                          | 1941/8000 [1:36:31<5:25:16,  3.22s/it]

Episode 1941/8000, real env return = -20.62


 24%|████████▌                          | 1951/8000 [1:37:03<5:17:39,  3.15s/it]

Episode 1951/8000, real env return = -29.60


 25%|████████▌                          | 1961/8000 [1:37:35<5:31:00,  3.29s/it]

Episode 1961/8000, real env return = -21.13


 25%|████████▌                          | 1971/8000 [1:38:06<5:12:54,  3.11s/it]

Episode 1971/8000, real env return = -23.05


 25%|████████▋                          | 1981/8000 [1:38:38<5:20:08,  3.19s/it]

Episode 1981/8000, real env return = -20.75


 25%|████████▋                          | 1991/8000 [1:39:10<5:21:36,  3.21s/it]

Episode 1991/8000, real env return = -23.15


 25%|████████▊                          | 2001/8000 [1:39:42<5:21:48,  3.22s/it]

Episode 2001/8000, real env return = -28.93


 25%|████████▊                          | 2011/8000 [1:40:15<5:30:19,  3.31s/it]

Episode 2011/8000, real env return = -25.29


 25%|████████▊                          | 2021/8000 [1:40:47<5:22:42,  3.24s/it]

Episode 2021/8000, real env return = -23.15


 25%|████████▉                          | 2031/8000 [1:41:20<5:23:05,  3.25s/it]

Episode 2031/8000, real env return = -22.64


 26%|████████▉                          | 2041/8000 [1:41:51<5:13:01,  3.15s/it]

Episode 2041/8000, real env return = -21.87


 26%|████████▉                          | 2051/8000 [1:42:22<5:02:28,  3.05s/it]

Episode 2051/8000, real env return = -25.48


 26%|█████████                          | 2061/8000 [1:42:54<5:06:50,  3.10s/it]

Episode 2061/8000, real env return = -19.32


 26%|█████████                          | 2071/8000 [1:43:26<5:12:24,  3.16s/it]

Episode 2071/8000, real env return = -24.52


 26%|█████████                          | 2081/8000 [1:43:57<5:11:53,  3.16s/it]

Episode 2081/8000, real env return = -32.52


 26%|█████████▏                         | 2091/8000 [1:44:29<5:10:17,  3.15s/it]

Episode 2091/8000, real env return = -23.81


 26%|█████████▏                         | 2101/8000 [1:45:01<5:18:59,  3.24s/it]

Episode 2101/8000, real env return = -28.25


 26%|█████████▏                         | 2111/8000 [1:45:33<5:12:23,  3.18s/it]

Episode 2111/8000, real env return = -23.38


 27%|█████████▎                         | 2121/8000 [1:46:04<5:10:19,  3.17s/it]

Episode 2121/8000, real env return = -26.36


 27%|█████████▎                         | 2131/8000 [1:46:36<5:17:17,  3.24s/it]

Episode 2131/8000, real env return = -23.37


 27%|█████████▎                         | 2141/8000 [1:47:08<5:16:44,  3.24s/it]

Episode 2141/8000, real env return = -26.08


 27%|█████████▍                         | 2151/8000 [1:47:40<5:11:07,  3.19s/it]

Episode 2151/8000, real env return = -23.89


 27%|█████████▍                         | 2161/8000 [1:48:12<5:14:41,  3.23s/it]

Episode 2161/8000, real env return = -16.88


 27%|█████████▍                         | 2171/8000 [1:48:44<5:12:22,  3.22s/it]

Episode 2171/8000, real env return = -24.92


 27%|█████████▌                         | 2181/8000 [1:49:16<5:07:32,  3.17s/it]

Episode 2181/8000, real env return = -27.27


 27%|█████████▌                         | 2191/8000 [1:49:48<5:08:00,  3.18s/it]

Episode 2191/8000, real env return = -25.81


 28%|█████████▋                         | 2201/8000 [1:50:21<5:08:38,  3.19s/it]

Episode 2201/8000, real env return = -26.44


 28%|█████████▋                         | 2211/8000 [1:50:52<5:02:05,  3.13s/it]

Episode 2211/8000, real env return = -24.87


 28%|█████████▋                         | 2221/8000 [1:51:23<4:59:49,  3.11s/it]

Episode 2221/8000, real env return = -24.73


 28%|█████████▊                         | 2231/8000 [1:51:54<4:59:55,  3.12s/it]

Episode 2231/8000, real env return = -30.46


 28%|█████████▊                         | 2241/8000 [1:52:26<4:47:30,  3.00s/it]

Episode 2241/8000, real env return = -26.26


 28%|█████████▊                         | 2251/8000 [1:52:58<5:08:14,  3.22s/it]

Episode 2251/8000, real env return = -23.33


 28%|█████████▉                         | 2261/8000 [1:53:30<5:02:31,  3.16s/it]

Episode 2261/8000, real env return = -23.58


 28%|█████████▉                         | 2271/8000 [1:54:02<5:10:31,  3.25s/it]

Episode 2271/8000, real env return = -23.56


 29%|█████████▉                         | 2281/8000 [1:54:33<5:03:15,  3.18s/it]

Episode 2281/8000, real env return = -22.68


 29%|██████████                         | 2291/8000 [1:55:05<4:48:03,  3.03s/it]

Episode 2291/8000, real env return = -22.28


 29%|██████████                         | 2301/8000 [1:55:37<5:10:40,  3.27s/it]

Episode 2301/8000, real env return = -22.94


 29%|██████████                         | 2311/8000 [1:56:08<4:38:41,  2.94s/it]

Episode 2311/8000, real env return = -29.36


 29%|██████████▏                        | 2321/8000 [1:56:40<5:00:54,  3.18s/it]

Episode 2321/8000, real env return = -28.35


 29%|██████████▏                        | 2331/8000 [1:57:12<4:53:37,  3.11s/it]

Episode 2331/8000, real env return = -28.31


 29%|██████████▏                        | 2341/8000 [1:57:44<5:01:29,  3.20s/it]

Episode 2341/8000, real env return = -23.56


 29%|██████████▎                        | 2351/8000 [1:58:17<5:11:26,  3.31s/it]

Episode 2351/8000, real env return = -33.82


 30%|██████████▎                        | 2361/8000 [1:58:49<5:00:20,  3.20s/it]

Episode 2361/8000, real env return = -26.04


 30%|██████████▎                        | 2371/8000 [1:59:21<5:06:24,  3.27s/it]

Episode 2371/8000, real env return = -26.14


 30%|██████████▍                        | 2381/8000 [1:59:53<5:03:20,  3.24s/it]

Episode 2381/8000, real env return = -27.25


 30%|██████████▍                        | 2391/8000 [2:00:25<5:08:46,  3.30s/it]

Episode 2391/8000, real env return = -34.82


 30%|██████████▌                        | 2401/8000 [2:00:58<5:04:08,  3.26s/it]

Episode 2401/8000, real env return = -31.20


 30%|██████████▌                        | 2411/8000 [2:01:31<5:08:02,  3.31s/it]

Episode 2411/8000, real env return = -53.11


 30%|██████████▌                        | 2421/8000 [2:02:03<4:59:02,  3.22s/it]

Episode 2421/8000, real env return = -20.70


 30%|██████████▋                        | 2431/8000 [2:02:33<4:54:58,  3.18s/it]

Episode 2431/8000, real env return = -24.44


 31%|██████████▋                        | 2441/8000 [2:03:05<4:47:13,  3.10s/it]

Episode 2441/8000, real env return = -38.53


 31%|██████████▋                        | 2451/8000 [2:03:36<4:46:01,  3.09s/it]

Episode 2451/8000, real env return = -58.31


 31%|██████████▊                        | 2461/8000 [2:04:09<4:58:34,  3.23s/it]

Episode 2461/8000, real env return = -47.32


 31%|██████████▊                        | 2471/8000 [2:04:41<4:54:25,  3.19s/it]

Episode 2471/8000, real env return = -68.23


 31%|██████████▊                        | 2481/8000 [2:05:13<4:58:09,  3.24s/it]

Episode 2481/8000, real env return = -62.36


 31%|██████████▉                        | 2491/8000 [2:05:45<4:58:43,  3.25s/it]

Episode 2491/8000, real env return = -70.66


 31%|██████████▉                        | 2501/8000 [2:06:17<4:56:36,  3.24s/it]

Episode 2501/8000, real env return = -64.87


 31%|██████████▉                        | 2511/8000 [2:06:49<4:47:45,  3.15s/it]

Episode 2511/8000, real env return = -60.61


 32%|███████████                        | 2521/8000 [2:07:20<4:40:48,  3.08s/it]

Episode 2521/8000, real env return = -65.30


 32%|███████████                        | 2531/8000 [2:07:51<4:44:58,  3.13s/it]

Episode 2531/8000, real env return = -58.81


 32%|███████████                        | 2541/8000 [2:08:24<4:57:22,  3.27s/it]

Episode 2541/8000, real env return = -69.88


 32%|███████████▏                       | 2551/8000 [2:08:56<4:55:13,  3.25s/it]

Episode 2551/8000, real env return = -78.67


 32%|███████████▏                       | 2561/8000 [2:09:28<4:55:08,  3.26s/it]

Episode 2561/8000, real env return = -51.79


 32%|███████████▏                       | 2571/8000 [2:10:00<4:52:19,  3.23s/it]

Episode 2571/8000, real env return = -55.71


 32%|███████████▎                       | 2581/8000 [2:10:33<4:55:14,  3.27s/it]

Episode 2581/8000, real env return = -62.46


 32%|███████████▎                       | 2591/8000 [2:11:03<4:46:58,  3.18s/it]

Episode 2591/8000, real env return = -60.33


 33%|███████████▍                       | 2601/8000 [2:11:33<4:29:57,  3.00s/it]

Episode 2601/8000, real env return = -65.38


 33%|███████████▍                       | 2611/8000 [2:12:06<4:54:34,  3.28s/it]

Episode 2611/8000, real env return = -30.83


 33%|███████████▍                       | 2621/8000 [2:12:38<4:52:10,  3.26s/it]

Episode 2621/8000, real env return = -30.83


 33%|███████████▌                       | 2631/8000 [2:13:11<4:48:20,  3.22s/it]

Episode 2631/8000, real env return = -28.32


 33%|███████████▌                       | 2641/8000 [2:13:43<4:45:11,  3.19s/it]

Episode 2641/8000, real env return = -40.20


 33%|███████████▌                       | 2651/8000 [2:14:16<4:46:42,  3.22s/it]

Episode 2651/8000, real env return = -40.98


 33%|███████████▋                       | 2661/8000 [2:14:48<4:37:29,  3.12s/it]

Episode 2661/8000, real env return = -31.82


 33%|███████████▋                       | 2671/8000 [2:15:20<4:46:21,  3.22s/it]

Episode 2671/8000, real env return = -30.52


 34%|███████████▋                       | 2681/8000 [2:15:52<4:44:40,  3.21s/it]

Episode 2681/8000, real env return = -26.28


 34%|███████████▊                       | 2691/8000 [2:16:25<4:50:35,  3.28s/it]

Episode 2691/8000, real env return = -43.24


 34%|███████████▊                       | 2701/8000 [2:16:57<4:46:02,  3.24s/it]

Episode 2701/8000, real env return = -35.16


 34%|███████████▊                       | 2711/8000 [2:17:30<4:51:24,  3.31s/it]

Episode 2711/8000, real env return = -34.76


 34%|███████████▉                       | 2721/8000 [2:18:01<4:37:46,  3.16s/it]

Episode 2721/8000, real env return = -34.15


 34%|███████████▉                       | 2731/8000 [2:18:33<4:29:10,  3.07s/it]

Episode 2731/8000, real env return = -35.08


 34%|███████████▉                       | 2741/8000 [2:19:06<4:46:27,  3.27s/it]

Episode 2741/8000, real env return = -42.30


 34%|████████████                       | 2751/8000 [2:19:39<4:52:22,  3.34s/it]

Episode 2751/8000, real env return = -38.08


 35%|████████████                       | 2761/8000 [2:20:12<4:51:08,  3.33s/it]

Episode 2761/8000, real env return = -48.32


 35%|████████████                       | 2771/8000 [2:20:46<4:56:39,  3.40s/it]

Episode 2771/8000, real env return = -47.94


 35%|████████████▏                      | 2781/8000 [2:21:20<4:50:53,  3.34s/it]

Episode 2781/8000, real env return = -47.68


 35%|████████████▏                      | 2791/8000 [2:21:52<4:43:57,  3.27s/it]

Episode 2791/8000, real env return = -31.08


 35%|████████████▎                      | 2801/8000 [2:22:25<4:38:13,  3.21s/it]

Episode 2801/8000, real env return = -33.31


 35%|████████████▎                      | 2811/8000 [2:22:56<4:17:18,  2.98s/it]

Episode 2811/8000, real env return = -73.14


 35%|████████████▎                      | 2821/8000 [2:23:28<4:38:52,  3.23s/it]

Episode 2821/8000, real env return = -29.75


 35%|████████████▍                      | 2831/8000 [2:24:01<4:46:10,  3.32s/it]

Episode 2831/8000, real env return = -29.43


 36%|████████████▍                      | 2841/8000 [2:24:33<4:40:17,  3.26s/it]

Episode 2841/8000, real env return = -34.50


 36%|████████████▍                      | 2851/8000 [2:25:06<4:42:30,  3.29s/it]

Episode 2851/8000, real env return = -33.09


 36%|████████████▌                      | 2861/8000 [2:25:38<4:32:46,  3.18s/it]

Episode 2861/8000, real env return = -32.93


 36%|████████████▌                      | 2871/8000 [2:26:10<4:15:02,  2.98s/it]

Episode 2871/8000, real env return = -30.22


 36%|████████████▌                      | 2881/8000 [2:26:42<4:33:35,  3.21s/it]

Episode 2881/8000, real env return = -38.95


 36%|████████████▋                      | 2891/8000 [2:27:14<4:35:34,  3.24s/it]

Episode 2891/8000, real env return = -34.16


 36%|████████████▋                      | 2901/8000 [2:27:46<4:36:09,  3.25s/it]

Episode 2901/8000, real env return = -40.83


 36%|████████████▋                      | 2911/8000 [2:28:19<4:32:42,  3.22s/it]

Episode 2911/8000, real env return = -39.13


 37%|████████████▊                      | 2921/8000 [2:28:50<4:23:15,  3.11s/it]

Episode 2921/8000, real env return = -27.08


 37%|████████████▊                      | 2931/8000 [2:29:22<4:24:10,  3.13s/it]

Episode 2931/8000, real env return = -41.71


 37%|████████████▊                      | 2941/8000 [2:29:54<4:32:19,  3.23s/it]

Episode 2941/8000, real env return = -36.03


 37%|████████████▉                      | 2951/8000 [2:30:26<4:37:14,  3.29s/it]

Episode 2951/8000, real env return = -33.04


 37%|████████████▉                      | 2961/8000 [2:30:58<4:33:21,  3.25s/it]

Episode 2961/8000, real env return = -36.53


 37%|████████████▉                      | 2971/8000 [2:31:31<4:32:54,  3.26s/it]

Episode 2971/8000, real env return = -35.11


 37%|█████████████                      | 2981/8000 [2:32:03<4:31:58,  3.25s/it]

Episode 2981/8000, real env return = -35.77


 37%|█████████████                      | 2991/8000 [2:32:36<4:27:35,  3.21s/it]

Episode 2991/8000, real env return = -27.59


 38%|█████████████▏                     | 3001/8000 [2:33:08<4:30:08,  3.24s/it]

Episode 3001/8000, real env return = -34.19


 38%|█████████████▏                     | 3011/8000 [2:33:40<4:27:27,  3.22s/it]

Episode 3011/8000, real env return = -29.16


 38%|█████████████▏                     | 3021/8000 [2:34:12<4:29:51,  3.25s/it]

Episode 3021/8000, real env return = -48.34


 38%|█████████████▎                     | 3031/8000 [2:34:44<4:26:41,  3.22s/it]

Episode 3031/8000, real env return = -40.75


 38%|█████████████▎                     | 3041/8000 [2:35:17<4:32:22,  3.30s/it]

Episode 3041/8000, real env return = -32.07


 38%|█████████████▎                     | 3051/8000 [2:35:48<4:27:00,  3.24s/it]

Episode 3051/8000, real env return = -29.82


 38%|█████████████▍                     | 3061/8000 [2:36:21<4:19:47,  3.16s/it]

Episode 3061/8000, real env return = -34.31


 38%|█████████████▍                     | 3071/8000 [2:36:53<4:23:53,  3.21s/it]

Episode 3071/8000, real env return = -28.59


 39%|█████████████▍                     | 3081/8000 [2:37:23<4:21:02,  3.18s/it]

Episode 3081/8000, real env return = -32.89


 39%|█████████████▌                     | 3091/8000 [2:37:56<4:30:30,  3.31s/it]

Episode 3091/8000, real env return = -38.67


 39%|█████████████▌                     | 3101/8000 [2:38:29<4:31:05,  3.32s/it]

Episode 3101/8000, real env return = -30.00


 39%|█████████████▌                     | 3111/8000 [2:39:01<4:21:43,  3.21s/it]

Episode 3111/8000, real env return = -38.04


 39%|█████████████▋                     | 3121/8000 [2:39:34<4:26:44,  3.28s/it]

Episode 3121/8000, real env return = -29.26


 39%|█████████████▋                     | 3131/8000 [2:40:06<4:25:40,  3.27s/it]

Episode 3131/8000, real env return = -26.96


 39%|█████████████▋                     | 3141/8000 [2:40:38<4:04:09,  3.01s/it]

Episode 3141/8000, real env return = -26.19


 39%|█████████████▊                     | 3151/8000 [2:41:11<4:21:34,  3.24s/it]

Episode 3151/8000, real env return = -28.79


 40%|█████████████▊                     | 3161/8000 [2:41:43<4:25:16,  3.29s/it]

Episode 3161/8000, real env return = -21.38


 40%|█████████████▊                     | 3171/8000 [2:42:16<4:24:07,  3.28s/it]

Episode 3171/8000, real env return = -25.55


 40%|█████████████▉                     | 3181/8000 [2:42:49<4:20:22,  3.24s/it]

Episode 3181/8000, real env return = -31.14


 40%|█████████████▉                     | 3191/8000 [2:43:21<4:22:59,  3.28s/it]

Episode 3191/8000, real env return = -29.59


 40%|██████████████                     | 3201/8000 [2:43:53<4:15:01,  3.19s/it]

Episode 3201/8000, real env return = -34.08


 40%|██████████████                     | 3211/8000 [2:44:25<4:18:28,  3.24s/it]

Episode 3211/8000, real env return = -28.72


 40%|██████████████                     | 3221/8000 [2:44:56<4:12:06,  3.17s/it]

Episode 3221/8000, real env return = -28.25


 40%|██████████████▏                    | 3231/8000 [2:45:29<4:21:05,  3.28s/it]

Episode 3231/8000, real env return = -28.53


 41%|██████████████▏                    | 3241/8000 [2:46:02<4:22:40,  3.31s/it]

Episode 3241/8000, real env return = -31.07


 41%|██████████████▏                    | 3251/8000 [2:46:34<4:07:07,  3.12s/it]

Episode 3251/8000, real env return = -30.28


 41%|██████████████▎                    | 3261/8000 [2:47:06<4:16:42,  3.25s/it]

Episode 3261/8000, real env return = -27.72


 41%|██████████████▎                    | 3271/8000 [2:47:39<4:21:49,  3.32s/it]

Episode 3271/8000, real env return = -35.73


 41%|██████████████▎                    | 3281/8000 [2:48:10<4:13:34,  3.22s/it]

Episode 3281/8000, real env return = -23.09


 41%|██████████████▍                    | 3291/8000 [2:48:43<4:06:56,  3.15s/it]

Episode 3291/8000, real env return = -30.48


 41%|██████████████▍                    | 3301/8000 [2:49:16<4:20:36,  3.33s/it]

Episode 3301/8000, real env return = -31.26


 41%|██████████████▍                    | 3311/8000 [2:49:48<4:14:31,  3.26s/it]

Episode 3311/8000, real env return = -25.22


 42%|██████████████▌                    | 3321/8000 [2:50:20<4:10:11,  3.21s/it]

Episode 3321/8000, real env return = -23.63


 42%|██████████████▌                    | 3331/8000 [2:50:53<4:17:25,  3.31s/it]

Episode 3331/8000, real env return = -28.21


 42%|██████████████▌                    | 3341/8000 [2:51:25<4:09:48,  3.22s/it]

Episode 3341/8000, real env return = -34.22


 42%|██████████████▋                    | 3351/8000 [2:51:58<4:14:55,  3.29s/it]

Episode 3351/8000, real env return = -27.74


 42%|██████████████▋                    | 3361/8000 [2:52:31<4:15:31,  3.30s/it]

Episode 3361/8000, real env return = -31.09


 42%|██████████████▋                    | 3371/8000 [2:53:04<4:13:22,  3.28s/it]

Episode 3371/8000, real env return = -44.68


 42%|██████████████▊                    | 3381/8000 [2:53:36<4:11:18,  3.26s/it]

Episode 3381/8000, real env return = -30.93


 42%|██████████████▊                    | 3391/8000 [2:54:08<4:08:26,  3.23s/it]

Episode 3391/8000, real env return = -32.97


 43%|██████████████▉                    | 3401/8000 [2:54:41<4:09:49,  3.26s/it]

Episode 3401/8000, real env return = -27.29


 43%|██████████████▉                    | 3411/8000 [2:55:13<4:10:36,  3.28s/it]

Episode 3411/8000, real env return = -31.33


 43%|██████████████▉                    | 3421/8000 [2:55:45<4:07:29,  3.24s/it]

Episode 3421/8000, real env return = -33.78


 43%|███████████████                    | 3431/8000 [2:56:17<4:08:30,  3.26s/it]

Episode 3431/8000, real env return = -27.51


 43%|███████████████                    | 3441/8000 [2:56:49<4:06:41,  3.25s/it]

Episode 3441/8000, real env return = -30.01


 43%|███████████████                    | 3451/8000 [2:57:21<4:03:06,  3.21s/it]

Episode 3451/8000, real env return = -24.68


 43%|███████████████▏                   | 3461/8000 [2:57:55<4:17:42,  3.41s/it]

Episode 3461/8000, real env return = -26.83


 43%|███████████████▏                   | 3471/8000 [2:58:29<4:16:37,  3.40s/it]

Episode 3471/8000, real env return = -27.37


 44%|███████████████▏                   | 3481/8000 [2:59:03<4:20:23,  3.46s/it]

Episode 3481/8000, real env return = -28.20


 44%|███████████████▎                   | 3491/8000 [2:59:38<4:19:11,  3.45s/it]

Episode 3491/8000, real env return = -28.54


 44%|███████████████▎                   | 3501/8000 [3:00:12<4:18:32,  3.45s/it]

Episode 3501/8000, real env return = -29.60


 44%|███████████████▎                   | 3511/8000 [3:00:45<4:00:45,  3.22s/it]

Episode 3511/8000, real env return = -30.63


 44%|███████████████▍                   | 3521/8000 [3:01:19<4:16:07,  3.43s/it]

Episode 3521/8000, real env return = -32.85


 44%|███████████████▍                   | 3531/8000 [3:01:54<4:10:23,  3.36s/it]

Episode 3531/8000, real env return = -39.86


 44%|███████████████▍                   | 3541/8000 [3:02:27<4:09:00,  3.35s/it]

Episode 3541/8000, real env return = -32.32


 44%|███████████████▌                   | 3551/8000 [3:03:00<4:04:48,  3.30s/it]

Episode 3551/8000, real env return = -38.82


 45%|███████████████▌                   | 3561/8000 [3:03:33<4:07:22,  3.34s/it]

Episode 3561/8000, real env return = -36.99


 45%|███████████████▌                   | 3571/8000 [3:04:06<4:06:59,  3.35s/it]

Episode 3571/8000, real env return = -47.20


 45%|███████████████▋                   | 3581/8000 [3:04:39<4:05:33,  3.33s/it]

Episode 3581/8000, real env return = -32.57


 45%|███████████████▋                   | 3591/8000 [3:05:12<3:51:59,  3.16s/it]

Episode 3591/8000, real env return = -22.84


 45%|███████████████▊                   | 3601/8000 [3:05:45<3:57:38,  3.24s/it]

Episode 3601/8000, real env return = -31.36


 45%|███████████████▊                   | 3611/8000 [3:06:18<4:01:55,  3.31s/it]

Episode 3611/8000, real env return = -39.50


 45%|███████████████▊                   | 3621/8000 [3:06:50<4:02:20,  3.32s/it]

Episode 3621/8000, real env return = -25.31


 45%|███████████████▉                   | 3631/8000 [3:07:24<4:01:02,  3.31s/it]

Episode 3631/8000, real env return = -33.44


 46%|███████████████▉                   | 3641/8000 [3:07:56<4:02:00,  3.33s/it]

Episode 3641/8000, real env return = -30.10


 46%|███████████████▉                   | 3651/8000 [3:08:29<3:56:05,  3.26s/it]

Episode 3651/8000, real env return = -27.49


 46%|████████████████                   | 3661/8000 [3:09:01<4:05:30,  3.39s/it]

Episode 3661/8000, real env return = -23.59


 46%|████████████████                   | 3671/8000 [3:09:36<4:11:22,  3.48s/it]

Episode 3671/8000, real env return = -24.17


 46%|████████████████                   | 3681/8000 [3:10:10<4:10:01,  3.47s/it]

Episode 3681/8000, real env return = -23.90


 46%|████████████████▏                  | 3691/8000 [3:10:43<4:09:55,  3.48s/it]

Episode 3691/8000, real env return = -25.81


 46%|████████████████▏                  | 3701/8000 [3:11:19<4:12:17,  3.52s/it]

Episode 3701/8000, real env return = -24.39


 46%|████████████████▏                  | 3711/8000 [3:11:53<4:04:56,  3.43s/it]

Episode 3711/8000, real env return = -28.00


 47%|████████████████▎                  | 3721/8000 [3:12:28<4:15:48,  3.59s/it]

Episode 3721/8000, real env return = -36.99


 47%|████████████████▎                  | 3731/8000 [3:13:03<4:07:23,  3.48s/it]

Episode 3731/8000, real env return = -22.36


 47%|████████████████▎                  | 3741/8000 [3:13:37<4:07:13,  3.48s/it]

Episode 3741/8000, real env return = -22.05


 47%|████████████████▍                  | 3751/8000 [3:14:13<4:08:32,  3.51s/it]

Episode 3751/8000, real env return = -21.63


 47%|████████████████▍                  | 3761/8000 [3:14:47<4:02:55,  3.44s/it]

Episode 3761/8000, real env return = -28.66


 47%|████████████████▍                  | 3771/8000 [3:15:21<3:57:37,  3.37s/it]

Episode 3771/8000, real env return = -25.30


 47%|████████████████▌                  | 3781/8000 [3:15:57<4:03:19,  3.46s/it]

Episode 3781/8000, real env return = -25.00


 47%|████████████████▌                  | 3791/8000 [3:16:31<4:04:15,  3.48s/it]

Episode 3791/8000, real env return = -24.18


 48%|████████████████▋                  | 3801/8000 [3:17:06<4:07:19,  3.53s/it]

Episode 3801/8000, real env return = -31.47


 48%|████████████████▋                  | 3811/8000 [3:17:41<4:03:14,  3.48s/it]

Episode 3811/8000, real env return = -20.05


 48%|████████████████▋                  | 3821/8000 [3:18:15<4:04:13,  3.51s/it]

Episode 3821/8000, real env return = -25.46


 48%|████████████████▊                  | 3831/8000 [3:18:50<3:59:25,  3.45s/it]

Episode 3831/8000, real env return = -23.82


 48%|████████████████▊                  | 3841/8000 [3:19:25<4:02:22,  3.50s/it]

Episode 3841/8000, real env return = -26.02


 48%|████████████████▊                  | 3851/8000 [3:20:00<4:03:23,  3.52s/it]

Episode 3851/8000, real env return = -26.83


 48%|████████████████▉                  | 3861/8000 [3:20:35<3:58:22,  3.46s/it]

Episode 3861/8000, real env return = -30.80


 48%|████████████████▉                  | 3871/8000 [3:21:09<3:53:57,  3.40s/it]

Episode 3871/8000, real env return = -27.51


 49%|████████████████▉                  | 3881/8000 [3:21:44<3:56:50,  3.45s/it]

Episode 3881/8000, real env return = -20.46


 49%|█████████████████                  | 3891/8000 [3:22:19<3:59:26,  3.50s/it]

Episode 3891/8000, real env return = -20.12


 49%|█████████████████                  | 3901/8000 [3:22:53<4:01:36,  3.54s/it]

Episode 3901/8000, real env return = -27.20


 49%|█████████████████                  | 3911/8000 [3:23:28<4:05:45,  3.61s/it]

Episode 3911/8000, real env return = -23.55


 49%|█████████████████▏                 | 3921/8000 [3:24:02<3:52:19,  3.42s/it]

Episode 3921/8000, real env return = -22.08


 49%|█████████████████▏                 | 3931/8000 [3:24:36<4:00:04,  3.54s/it]

Episode 3931/8000, real env return = -22.12


 49%|█████████████████▏                 | 3941/8000 [3:25:10<3:52:17,  3.43s/it]

Episode 3941/8000, real env return = -26.10


 49%|█████████████████▎                 | 3951/8000 [3:25:45<3:56:53,  3.51s/it]

Episode 3951/8000, real env return = -20.16


 50%|█████████████████▎                 | 3961/8000 [3:26:19<3:46:12,  3.36s/it]

Episode 3961/8000, real env return = -21.84


 50%|█████████████████▎                 | 3971/8000 [3:26:52<3:34:53,  3.20s/it]

Episode 3971/8000, real env return = -22.41


 50%|█████████████████▍                 | 3981/8000 [3:27:26<3:53:18,  3.48s/it]

Episode 3981/8000, real env return = -24.22


 50%|█████████████████▍                 | 3991/8000 [3:28:01<3:50:23,  3.45s/it]

Episode 3991/8000, real env return = -21.53


 50%|█████████████████▌                 | 4001/8000 [3:28:36<4:01:03,  3.62s/it]

Episode 4001/8000, real env return = -20.56


 50%|█████████████████▌                 | 4011/8000 [3:29:10<3:52:10,  3.49s/it]

Episode 4011/8000, real env return = -23.42


 50%|█████████████████▌                 | 4021/8000 [3:29:44<3:51:02,  3.48s/it]

Episode 4021/8000, real env return = -27.66


 50%|█████████████████▋                 | 4031/8000 [3:30:18<3:45:20,  3.41s/it]

Episode 4031/8000, real env return = -23.72


 51%|█████████████████▋                 | 4041/8000 [3:30:53<3:51:23,  3.51s/it]

Episode 4041/8000, real env return = -17.79


 51%|█████████████████▋                 | 4051/8000 [3:31:26<3:47:49,  3.46s/it]

Episode 4051/8000, real env return = -24.57


 51%|█████████████████▊                 | 4061/8000 [3:32:00<3:36:52,  3.30s/it]

Episode 4061/8000, real env return = -25.81


 51%|█████████████████▊                 | 4071/8000 [3:32:35<3:47:33,  3.48s/it]

Episode 4071/8000, real env return = -22.77


 51%|█████████████████▊                 | 4081/8000 [3:33:09<3:46:54,  3.47s/it]

Episode 4081/8000, real env return = -17.12


 51%|█████████████████▉                 | 4091/8000 [3:33:43<3:36:25,  3.32s/it]

Episode 4091/8000, real env return = -11.54


 51%|█████████████████▉                 | 4101/8000 [3:34:18<3:48:36,  3.52s/it]

Episode 4101/8000, real env return = -14.82


 51%|█████████████████▉                 | 4111/8000 [3:34:52<3:46:42,  3.50s/it]

Episode 4111/8000, real env return = -7.72


 52%|██████████████████                 | 4121/8000 [3:35:27<3:40:54,  3.42s/it]

Episode 4121/8000, real env return = 2.30


 52%|██████████████████                 | 4131/8000 [3:36:02<3:42:33,  3.45s/it]

Episode 4131/8000, real env return = 3.26


 52%|██████████████████                 | 4141/8000 [3:36:36<3:42:33,  3.46s/it]

Episode 4141/8000, real env return = -6.16


 52%|██████████████████▏                | 4151/8000 [3:37:11<3:42:41,  3.47s/it]

Episode 4151/8000, real env return = -0.02


 52%|██████████████████▏                | 4161/8000 [3:37:45<3:41:33,  3.46s/it]

Episode 4161/8000, real env return = 1.35


 52%|██████████████████▏                | 4171/8000 [3:38:20<3:38:24,  3.42s/it]

Episode 4171/8000, real env return = 0.99


 52%|██████████████████▎                | 4181/8000 [3:38:54<3:40:13,  3.46s/it]

Episode 4181/8000, real env return = 3.25


 52%|██████████████████▎                | 4191/8000 [3:39:28<3:41:41,  3.49s/it]

Episode 4191/8000, real env return = 5.16


 53%|██████████████████▍                | 4201/8000 [3:40:01<3:33:37,  3.37s/it]

Episode 4201/8000, real env return = 4.24


 53%|██████████████████▍                | 4211/8000 [3:40:36<3:41:21,  3.51s/it]

Episode 4211/8000, real env return = -1.46


 53%|██████████████████▍                | 4221/8000 [3:41:10<3:38:07,  3.46s/it]

Episode 4221/8000, real env return = 5.54


 53%|██████████████████▌                | 4231/8000 [3:41:44<3:36:20,  3.44s/it]

Episode 4231/8000, real env return = 5.30


 53%|██████████████████▌                | 4241/8000 [3:42:18<3:33:23,  3.41s/it]

Episode 4241/8000, real env return = 11.81


 53%|██████████████████▌                | 4251/8000 [3:42:52<3:31:52,  3.39s/it]

Episode 4251/8000, real env return = 3.46


 53%|██████████████████▋                | 4261/8000 [3:43:27<3:35:43,  3.46s/it]

Episode 4261/8000, real env return = 2.84


 53%|██████████████████▋                | 4271/8000 [3:44:01<3:37:51,  3.51s/it]

Episode 4271/8000, real env return = -0.70


 54%|██████████████████▋                | 4281/8000 [3:44:35<3:29:10,  3.37s/it]

Episode 4281/8000, real env return = 11.14


 54%|██████████████████▊                | 4291/8000 [3:45:09<3:34:48,  3.48s/it]

Episode 4291/8000, real env return = 16.31


 54%|██████████████████▊                | 4301/8000 [3:45:43<3:31:39,  3.43s/it]

Episode 4301/8000, real env return = 16.74


 54%|██████████████████▊                | 4311/8000 [3:46:18<3:32:49,  3.46s/it]

Episode 4311/8000, real env return = 7.83


 54%|██████████████████▉                | 4321/8000 [3:46:52<3:25:43,  3.36s/it]

Episode 4321/8000, real env return = 9.28


 54%|██████████████████▉                | 4331/8000 [3:47:27<3:34:44,  3.51s/it]

Episode 4331/8000, real env return = 37.95


 54%|██████████████████▉                | 4341/8000 [3:48:01<3:27:14,  3.40s/it]

Episode 4341/8000, real env return = 54.26


 54%|███████████████████                | 4351/8000 [3:48:34<3:18:00,  3.26s/it]

Episode 4351/8000, real env return = 77.61


 55%|███████████████████                | 4361/8000 [3:49:09<3:36:37,  3.57s/it]

Episode 4361/8000, real env return = 90.66


 55%|███████████████████                | 4371/8000 [3:49:45<3:37:36,  3.60s/it]

Episode 4371/8000, real env return = 91.80


 55%|███████████████████▏               | 4381/8000 [3:50:21<3:36:03,  3.58s/it]

Episode 4381/8000, real env return = 116.79


 55%|███████████████████▏               | 4391/8000 [3:50:57<3:38:01,  3.62s/it]

Episode 4391/8000, real env return = 114.30


 55%|███████████████████▎               | 4401/8000 [3:51:32<3:33:23,  3.56s/it]

Episode 4401/8000, real env return = 106.48


 55%|███████████████████▎               | 4411/8000 [3:52:07<3:24:55,  3.43s/it]

Episode 4411/8000, real env return = 132.33


 55%|███████████████████▎               | 4421/8000 [3:52:42<3:28:22,  3.49s/it]

Episode 4421/8000, real env return = 67.06


 55%|███████████████████▍               | 4431/8000 [3:53:17<3:18:58,  3.35s/it]

Episode 4431/8000, real env return = 152.46


 56%|███████████████████▍               | 4441/8000 [3:53:51<3:23:12,  3.43s/it]

Episode 4441/8000, real env return = 272.61


 56%|███████████████████▍               | 4451/8000 [3:54:25<3:18:55,  3.36s/it]

Episode 4451/8000, real env return = 277.75


 56%|███████████████████▌               | 4461/8000 [3:54:58<3:11:18,  3.24s/it]

Episode 4461/8000, real env return = -4.18


 56%|███████████████████▌               | 4471/8000 [3:55:28<2:49:07,  2.88s/it]

Episode 4471/8000, real env return = 7.29


 56%|███████████████████▌               | 4481/8000 [3:56:01<3:13:22,  3.30s/it]

Episode 4481/8000, real env return = 176.41


 56%|███████████████████▋               | 4491/8000 [3:56:32<3:05:34,  3.17s/it]

Episode 4491/8000, real env return = 284.30


 56%|███████████████████▋               | 4501/8000 [3:57:04<3:02:05,  3.12s/it]

Episode 4501/8000, real env return = -39.41


 56%|███████████████████▋               | 4511/8000 [3:57:37<3:08:03,  3.23s/it]

Episode 4511/8000, real env return = 274.79


 57%|███████████████████▊               | 4521/8000 [3:58:11<3:09:18,  3.26s/it]

Episode 4521/8000, real env return = 2.29


 57%|███████████████████▊               | 4531/8000 [3:58:44<3:12:15,  3.33s/it]

Episode 4531/8000, real env return = 284.70


 57%|███████████████████▊               | 4541/8000 [3:59:16<2:59:22,  3.11s/it]

Episode 4541/8000, real env return = 287.66


 57%|███████████████████▉               | 4551/8000 [3:59:47<2:49:09,  2.94s/it]

Episode 4551/8000, real env return = 287.28


 57%|███████████████████▉               | 4561/8000 [4:00:21<3:07:37,  3.27s/it]

Episode 4561/8000, real env return = 290.26


 57%|███████████████████▉               | 4571/8000 [4:00:53<3:04:21,  3.23s/it]

Episode 4571/8000, real env return = 287.47


 57%|████████████████████               | 4581/8000 [4:01:25<3:02:45,  3.21s/it]

Episode 4581/8000, real env return = 44.71


 57%|████████████████████               | 4591/8000 [4:01:56<2:56:55,  3.11s/it]

Episode 4591/8000, real env return = 293.07


 58%|████████████████████▏              | 4601/8000 [4:02:27<2:54:04,  3.07s/it]

Episode 4601/8000, real env return = 288.23


 58%|████████████████████▏              | 4611/8000 [4:02:59<3:00:58,  3.20s/it]

Episode 4611/8000, real env return = 292.64


 58%|████████████████████▏              | 4621/8000 [4:03:30<2:50:07,  3.02s/it]

Episode 4621/8000, real env return = 293.48


 58%|████████████████████▎              | 4631/8000 [4:04:01<2:53:56,  3.10s/it]

Episode 4631/8000, real env return = 291.01


 58%|████████████████████▎              | 4641/8000 [4:04:32<2:55:42,  3.14s/it]

Episode 4641/8000, real env return = 293.18


 58%|████████████████████▎              | 4651/8000 [4:05:03<2:54:06,  3.12s/it]

Episode 4651/8000, real env return = 291.70


 58%|████████████████████▍              | 4661/8000 [4:05:35<2:48:13,  3.02s/it]

Episode 4661/8000, real env return = 291.86


 58%|████████████████████▍              | 4671/8000 [4:06:06<2:51:08,  3.08s/it]

Episode 4671/8000, real env return = 289.05


 59%|████████████████████▍              | 4681/8000 [4:07:13<6:46:13,  7.34s/it]

Episode 4681/8000, real env return = 287.05


 59%|████████████████████▌              | 4691/8000 [4:08:29<6:58:57,  7.60s/it]

Episode 4691/8000, real env return = 289.09


 59%|████████████████████▌              | 4701/8000 [4:09:44<6:51:25,  7.48s/it]

Episode 4701/8000, real env return = 292.47


 59%|████████████████████▌              | 4711/8000 [4:10:59<6:34:01,  7.19s/it]

Episode 4711/8000, real env return = 127.46


 59%|████████████████████▋              | 4721/8000 [4:12:12<6:47:34,  7.46s/it]

Episode 4721/8000, real env return = 293.87


 59%|████████████████████▋              | 4731/8000 [4:13:29<7:03:31,  7.77s/it]

Episode 4731/8000, real env return = 291.57


 59%|████████████████████▋              | 4741/8000 [4:14:45<6:40:41,  7.38s/it]

Episode 4741/8000, real env return = 290.64


 59%|████████████████████▊              | 4751/8000 [4:15:59<6:51:39,  7.60s/it]

Episode 4751/8000, real env return = 288.34


 60%|████████████████████▊              | 4761/8000 [4:17:14<6:42:11,  7.45s/it]

Episode 4761/8000, real env return = 288.41


 60%|████████████████████▊              | 4771/8000 [4:18:30<6:47:35,  7.57s/it]

Episode 4771/8000, real env return = 291.47


 60%|████████████████████▉              | 4781/8000 [4:19:42<6:32:53,  7.32s/it]

Episode 4781/8000, real env return = 291.23


 60%|████████████████████▉              | 4791/8000 [4:20:56<6:33:10,  7.35s/it]

Episode 4791/8000, real env return = 286.81


 60%|█████████████████████              | 4801/8000 [4:22:11<6:44:27,  7.59s/it]

Episode 4801/8000, real env return = 288.29


 60%|█████████████████████              | 4811/8000 [4:23:23<6:22:12,  7.19s/it]

Episode 4811/8000, real env return = 286.86


 60%|█████████████████████              | 4821/8000 [4:24:37<6:35:00,  7.46s/it]

Episode 4821/8000, real env return = 284.90


 60%|█████████████████████▏             | 4831/8000 [4:25:49<6:31:55,  7.42s/it]

Episode 4831/8000, real env return = 288.21


 61%|█████████████████████▏             | 4841/8000 [4:27:03<6:28:00,  7.37s/it]

Episode 4841/8000, real env return = 279.50


 61%|█████████████████████▏             | 4851/8000 [4:28:15<5:59:29,  6.85s/it]

Episode 4851/8000, real env return = 286.42


 61%|█████████████████████▎             | 4861/8000 [4:29:29<6:09:01,  7.05s/it]

Episode 4861/8000, real env return = 293.22


 61%|█████████████████████▎             | 4871/8000 [4:30:40<6:16:26,  7.22s/it]

Episode 4871/8000, real env return = 290.51


 61%|█████████████████████▎             | 4881/8000 [4:31:54<6:24:06,  7.39s/it]

Episode 4881/8000, real env return = 287.04


 61%|█████████████████████▍             | 4891/8000 [4:33:08<6:11:34,  7.17s/it]

Episode 4891/8000, real env return = 286.95


 61%|█████████████████████▍             | 4901/8000 [4:34:24<6:44:41,  7.84s/it]

Episode 4901/8000, real env return = 288.21


 61%|█████████████████████▍             | 4911/8000 [4:35:40<6:28:11,  7.54s/it]

Episode 4911/8000, real env return = 291.73


 62%|█████████████████████▌             | 4921/8000 [4:36:54<6:14:26,  7.30s/it]

Episode 4921/8000, real env return = 290.31


 62%|█████████████████████▌             | 4931/8000 [4:38:07<6:17:30,  7.38s/it]

Episode 4931/8000, real env return = 289.05


 62%|█████████████████████▌             | 4941/8000 [4:39:20<6:04:41,  7.15s/it]

Episode 4941/8000, real env return = 286.39


 62%|█████████████████████▋             | 4951/8000 [4:40:33<6:18:55,  7.46s/it]

Episode 4951/8000, real env return = 287.33


 62%|█████████████████████▋             | 4961/8000 [4:41:43<6:03:22,  7.17s/it]

Episode 4961/8000, real env return = -120.13


 62%|█████████████████████▋             | 4971/8000 [4:42:52<5:50:23,  6.94s/it]

Episode 4971/8000, real env return = 282.17


 62%|█████████████████████▊             | 4981/8000 [4:43:57<5:40:19,  6.76s/it]

Episode 4981/8000, real env return = -126.41


 62%|█████████████████████▊             | 4991/8000 [4:45:06<5:41:23,  6.81s/it]

Episode 4991/8000, real env return = -115.65


 63%|█████████████████████▉             | 5001/8000 [4:45:46<2:43:32,  3.27s/it]

Episode 5001/8000, real env return = 290.30


 63%|█████████████████████▉             | 5011/8000 [4:46:19<2:39:13,  3.20s/it]

Episode 5011/8000, real env return = 287.13


 63%|█████████████████████▉             | 5021/8000 [4:46:48<2:24:59,  2.92s/it]

Episode 5021/8000, real env return = 287.11


 63%|██████████████████████             | 5031/8000 [4:47:18<2:28:23,  3.00s/it]

Episode 5031/8000, real env return = 287.29


 63%|██████████████████████             | 5041/8000 [4:47:49<2:40:56,  3.26s/it]

Episode 5041/8000, real env return = 287.68


 63%|██████████████████████             | 5051/8000 [4:48:20<2:30:37,  3.06s/it]

Episode 5051/8000, real env return = 286.53


 63%|██████████████████████▏            | 5061/8000 [4:48:52<2:39:04,  3.25s/it]

Episode 5061/8000, real env return = 286.78


 63%|██████████████████████▏            | 5071/8000 [4:49:22<2:25:43,  2.99s/it]

Episode 5071/8000, real env return = 288.65


 64%|██████████████████████▏            | 5081/8000 [4:49:51<2:20:20,  2.88s/it]

Episode 5081/8000, real env return = 285.75


 64%|██████████████████████▎            | 5091/8000 [4:50:19<2:13:51,  2.76s/it]

Episode 5091/8000, real env return = 287.66


 64%|██████████████████████▎            | 5101/8000 [4:50:50<2:23:46,  2.98s/it]

Episode 5101/8000, real env return = 285.13


 64%|██████████████████████▎            | 5111/8000 [4:51:19<2:25:22,  3.02s/it]

Episode 5111/8000, real env return = 284.48


 64%|██████████████████████▍            | 5121/8000 [4:51:48<2:21:35,  2.95s/it]

Episode 5121/8000, real env return = 293.22


 64%|██████████████████████▍            | 5131/8000 [4:52:18<2:21:54,  2.97s/it]

Episode 5131/8000, real env return = 285.16


 64%|██████████████████████▍            | 5141/8000 [4:52:47<2:21:06,  2.96s/it]

Episode 5141/8000, real env return = 289.38


 64%|██████████████████████▌            | 5151/8000 [4:53:15<2:13:27,  2.81s/it]

Episode 5151/8000, real env return = 292.57


 65%|██████████████████████▌            | 5161/8000 [4:53:44<2:11:57,  2.79s/it]

Episode 5161/8000, real env return = 291.16


 65%|██████████████████████▌            | 5171/8000 [4:54:13<2:21:25,  3.00s/it]

Episode 5171/8000, real env return = 290.95


 65%|██████████████████████▋            | 5181/8000 [4:54:45<2:27:58,  3.15s/it]

Episode 5181/8000, real env return = 289.90


 65%|██████████████████████▋            | 5191/8000 [4:55:16<2:25:55,  3.12s/it]

Episode 5191/8000, real env return = 285.98


 65%|██████████████████████▊            | 5201/8000 [4:55:46<2:23:42,  3.08s/it]

Episode 5201/8000, real env return = 290.41


 65%|██████████████████████▊            | 5211/8000 [4:56:15<2:20:33,  3.02s/it]

Episode 5211/8000, real env return = 289.39


 65%|██████████████████████▊            | 5221/8000 [4:56:46<2:12:41,  2.86s/it]

Episode 5221/8000, real env return = 291.75


 65%|██████████████████████▉            | 5231/8000 [4:57:16<2:24:19,  3.13s/it]

Episode 5231/8000, real env return = 286.37


 66%|██████████████████████▉            | 5241/8000 [4:57:47<2:15:44,  2.95s/it]

Episode 5241/8000, real env return = 289.36


 66%|██████████████████████▉            | 5251/8000 [4:58:19<2:23:38,  3.14s/it]

Episode 5251/8000, real env return = 288.62


 66%|███████████████████████            | 5261/8000 [4:58:51<2:25:44,  3.19s/it]

Episode 5261/8000, real env return = 283.64


 66%|███████████████████████            | 5271/8000 [4:59:20<2:14:08,  2.95s/it]

Episode 5271/8000, real env return = 290.73


 66%|███████████████████████            | 5281/8000 [4:59:52<2:16:16,  3.01s/it]

Episode 5281/8000, real env return = 287.24


 66%|███████████████████████▏           | 5291/8000 [5:00:23<2:21:23,  3.13s/it]

Episode 5291/8000, real env return = 290.08


 66%|███████████████████████▏           | 5301/8000 [5:00:54<2:21:54,  3.15s/it]

Episode 5301/8000, real env return = 287.19


 66%|███████████████████████▏           | 5311/8000 [5:01:25<2:19:33,  3.11s/it]

Episode 5311/8000, real env return = 290.59


 67%|███████████████████████▎           | 5321/8000 [5:01:55<2:13:50,  3.00s/it]

Episode 5321/8000, real env return = 290.99


 67%|███████████████████████▎           | 5331/8000 [5:02:23<2:07:52,  2.87s/it]

Episode 5331/8000, real env return = 293.68


 67%|███████████████████████▎           | 5341/8000 [5:02:53<2:13:38,  3.02s/it]

Episode 5341/8000, real env return = 290.09


 67%|███████████████████████▍           | 5351/8000 [5:03:22<2:07:09,  2.88s/it]

Episode 5351/8000, real env return = 284.48


 67%|███████████████████████▍           | 5361/8000 [5:03:51<2:15:50,  3.09s/it]

Episode 5361/8000, real env return = 292.73


 67%|███████████████████████▍           | 5371/8000 [5:04:21<2:07:37,  2.91s/it]

Episode 5371/8000, real env return = 294.34


 67%|███████████████████████▌           | 5381/8000 [5:04:50<2:05:27,  2.87s/it]

Episode 5381/8000, real env return = 289.28


 67%|███████████████████████▌           | 5391/8000 [5:05:20<2:06:59,  2.92s/it]

Episode 5391/8000, real env return = 290.27


 68%|███████████████████████▋           | 5401/8000 [5:05:49<2:04:54,  2.88s/it]

Episode 5401/8000, real env return = 290.51


 68%|███████████████████████▋           | 5411/8000 [5:06:18<2:10:04,  3.01s/it]

Episode 5411/8000, real env return = 292.34


 68%|███████████████████████▋           | 5421/8000 [5:06:49<2:12:29,  3.08s/it]

Episode 5421/8000, real env return = 291.88


 68%|███████████████████████▊           | 5431/8000 [5:07:17<2:06:05,  2.94s/it]

Episode 5431/8000, real env return = 288.39


 68%|███████████████████████▊           | 5441/8000 [5:07:48<2:14:19,  3.15s/it]

Episode 5441/8000, real env return = 289.44


 68%|███████████████████████▊           | 5451/8000 [5:08:17<1:55:35,  2.72s/it]

Episode 5451/8000, real env return = 290.72


 68%|███████████████████████▉           | 5461/8000 [5:08:46<2:00:00,  2.84s/it]

Episode 5461/8000, real env return = 292.15


 68%|███████████████████████▉           | 5471/8000 [5:09:17<2:07:54,  3.03s/it]

Episode 5471/8000, real env return = 289.07


 69%|███████████████████████▉           | 5481/8000 [5:09:46<1:58:58,  2.83s/it]

Episode 5481/8000, real env return = 297.25


 69%|████████████████████████           | 5491/8000 [5:10:15<1:58:55,  2.84s/it]

Episode 5491/8000, real env return = 292.28


 69%|████████████████████████           | 5501/8000 [5:10:44<2:02:07,  2.93s/it]

Episode 5501/8000, real env return = 293.33


 69%|████████████████████████           | 5511/8000 [5:11:14<2:04:29,  3.00s/it]

Episode 5511/8000, real env return = 291.01


 69%|████████████████████████▏          | 5521/8000 [5:11:41<1:55:46,  2.80s/it]

Episode 5521/8000, real env return = 293.93


 69%|████████████████████████▏          | 5531/8000 [5:12:10<2:00:35,  2.93s/it]

Episode 5531/8000, real env return = 291.76


 69%|████████████████████████▏          | 5541/8000 [5:12:39<2:04:49,  3.05s/it]

Episode 5541/8000, real env return = 290.89


 69%|████████████████████████▎          | 5551/8000 [5:13:07<1:50:08,  2.70s/it]

Episode 5551/8000, real env return = 292.44


 70%|████████████████████████▎          | 5561/8000 [5:13:35<1:51:30,  2.74s/it]

Episode 5561/8000, real env return = 289.25


 70%|████████████████████████▎          | 5571/8000 [5:14:03<1:53:29,  2.80s/it]

Episode 5571/8000, real env return = 292.48


 70%|████████████████████████▍          | 5581/8000 [5:14:31<1:47:11,  2.66s/it]

Episode 5581/8000, real env return = 287.40


 70%|████████████████████████▍          | 5591/8000 [5:15:00<1:58:23,  2.95s/it]

Episode 5591/8000, real env return = 289.76


 70%|████████████████████████▌          | 5601/8000 [5:15:29<1:53:09,  2.83s/it]

Episode 5601/8000, real env return = 296.18


 70%|████████████████████████▌          | 5611/8000 [5:15:55<1:40:56,  2.54s/it]

Episode 5611/8000, real env return = 294.17


 70%|████████████████████████▌          | 5621/8000 [5:16:23<1:45:53,  2.67s/it]

Episode 5621/8000, real env return = 293.66


 70%|████████████████████████▋          | 5631/8000 [5:16:52<1:56:18,  2.95s/it]

Episode 5631/8000, real env return = 294.23


 71%|████████████████████████▋          | 5641/8000 [5:17:21<1:53:17,  2.88s/it]

Episode 5641/8000, real env return = 296.21


 71%|████████████████████████▋          | 5651/8000 [5:17:50<1:54:47,  2.93s/it]

Episode 5651/8000, real env return = 290.87


 71%|████████████████████████▊          | 5661/8000 [5:18:19<1:52:54,  2.90s/it]

Episode 5661/8000, real env return = 297.55


 71%|████████████████████████▊          | 5671/8000 [5:18:47<1:50:13,  2.84s/it]

Episode 5671/8000, real env return = 293.69


 71%|████████████████████████▊          | 5681/8000 [5:19:15<1:50:56,  2.87s/it]

Episode 5681/8000, real env return = 292.84


 71%|████████████████████████▉          | 5691/8000 [5:19:41<1:39:28,  2.58s/it]

Episode 5691/8000, real env return = 298.38


 71%|████████████████████████▉          | 5701/8000 [5:20:10<1:53:29,  2.96s/it]

Episode 5701/8000, real env return = 297.46


 71%|████████████████████████▉          | 5711/8000 [5:20:38<1:39:06,  2.60s/it]

Episode 5711/8000, real env return = 296.78


 72%|█████████████████████████          | 5721/8000 [5:21:07<1:49:09,  2.87s/it]

Episode 5721/8000, real env return = 297.90


 72%|█████████████████████████          | 5731/8000 [5:21:35<1:46:46,  2.82s/it]

Episode 5731/8000, real env return = 294.41


 72%|█████████████████████████          | 5741/8000 [5:22:02<1:44:57,  2.79s/it]

Episode 5741/8000, real env return = 292.24


 72%|█████████████████████████▏         | 5751/8000 [5:22:31<1:45:28,  2.81s/it]

Episode 5751/8000, real env return = 295.13


 72%|█████████████████████████▏         | 5761/8000 [5:22:57<1:35:42,  2.56s/it]

Episode 5761/8000, real env return = -6.65


 72%|█████████████████████████▏         | 5771/8000 [5:23:24<1:41:09,  2.72s/it]

Episode 5771/8000, real env return = 289.02


 72%|█████████████████████████▎         | 5781/8000 [5:23:52<1:46:02,  2.87s/it]

Episode 5781/8000, real env return = 296.51


 72%|█████████████████████████▎         | 5791/8000 [5:24:20<1:39:52,  2.71s/it]

Episode 5791/8000, real env return = 292.67


 73%|█████████████████████████▍         | 5801/8000 [5:24:47<1:42:15,  2.79s/it]

Episode 5801/8000, real env return = 292.70


 73%|█████████████████████████▍         | 5811/8000 [5:25:14<1:39:05,  2.72s/it]

Episode 5811/8000, real env return = 295.42


 73%|█████████████████████████▍         | 5821/8000 [5:25:40<1:33:51,  2.58s/it]

Episode 5821/8000, real env return = 63.22


 73%|█████████████████████████▌         | 5831/8000 [5:26:07<1:40:40,  2.79s/it]

Episode 5831/8000, real env return = 294.15


 73%|█████████████████████████▌         | 5841/8000 [5:26:35<1:35:53,  2.66s/it]

Episode 5841/8000, real env return = 294.06


 73%|█████████████████████████▌         | 5851/8000 [5:27:05<1:49:02,  3.04s/it]

Episode 5851/8000, real env return = 292.57


 73%|█████████████████████████▋         | 5861/8000 [5:27:33<1:35:52,  2.69s/it]

Episode 5861/8000, real env return = 290.96


 73%|█████████████████████████▋         | 5871/8000 [5:28:01<1:34:44,  2.67s/it]

Episode 5871/8000, real env return = 292.18


 74%|█████████████████████████▋         | 5881/8000 [5:28:28<1:33:27,  2.65s/it]

Episode 5881/8000, real env return = 291.75


 74%|█████████████████████████▊         | 5891/8000 [5:28:58<1:45:49,  3.01s/it]

Episode 5891/8000, real env return = 286.82


 74%|█████████████████████████▊         | 5901/8000 [5:29:25<1:37:12,  2.78s/it]

Episode 5901/8000, real env return = 293.29


 74%|█████████████████████████▊         | 5911/8000 [5:29:51<1:33:01,  2.67s/it]

Episode 5911/8000, real env return = 291.73


 74%|█████████████████████████▉         | 5921/8000 [5:30:19<1:30:49,  2.62s/it]

Episode 5921/8000, real env return = 295.44


 74%|█████████████████████████▉         | 5931/8000 [5:30:47<1:36:00,  2.78s/it]

Episode 5931/8000, real env return = 293.31


 74%|█████████████████████████▉         | 5941/8000 [5:31:16<1:41:20,  2.95s/it]

Episode 5941/8000, real env return = 295.52


 74%|██████████████████████████         | 5951/8000 [5:31:44<1:39:17,  2.91s/it]

Episode 5951/8000, real env return = 295.04


 75%|██████████████████████████         | 5961/8000 [5:32:11<1:32:33,  2.72s/it]

Episode 5961/8000, real env return = 292.90


 75%|██████████████████████████         | 5971/8000 [5:32:38<1:34:09,  2.78s/it]

Episode 5971/8000, real env return = 294.51


 75%|██████████████████████████▏        | 5981/8000 [5:33:05<1:34:06,  2.80s/it]

Episode 5981/8000, real env return = 294.03


 75%|██████████████████████████▏        | 5991/8000 [5:33:33<1:37:09,  2.90s/it]

Episode 5991/8000, real env return = 297.12


 75%|██████████████████████████▎        | 6001/8000 [5:34:01<1:34:03,  2.82s/it]

Episode 6001/8000, real env return = 293.28


 75%|██████████████████████████▎        | 6011/8000 [5:34:30<1:36:02,  2.90s/it]

Episode 6011/8000, real env return = 296.17


 75%|██████████████████████████▎        | 6021/8000 [5:34:57<1:33:10,  2.83s/it]

Episode 6021/8000, real env return = 294.08


 75%|██████████████████████████▍        | 6031/8000 [5:35:24<1:28:12,  2.69s/it]

Episode 6031/8000, real env return = 293.65


 76%|██████████████████████████▍        | 6041/8000 [5:35:51<1:31:25,  2.80s/it]

Episode 6041/8000, real env return = 290.68


 76%|██████████████████████████▍        | 6051/8000 [5:36:16<1:20:47,  2.49s/it]

Episode 6051/8000, real env return = 290.82


 76%|██████████████████████████▌        | 6061/8000 [5:36:44<1:31:45,  2.84s/it]

Episode 6061/8000, real env return = 294.65


 76%|██████████████████████████▌        | 6071/8000 [5:37:12<1:30:28,  2.81s/it]

Episode 6071/8000, real env return = 293.42


 76%|██████████████████████████▌        | 6081/8000 [5:37:39<1:29:11,  2.79s/it]

Episode 6081/8000, real env return = 295.56


 76%|██████████████████████████▋        | 6091/8000 [5:38:06<1:28:39,  2.79s/it]

Episode 6091/8000, real env return = 292.09


 76%|██████████████████████████▋        | 6101/8000 [5:38:33<1:25:11,  2.69s/it]

Episode 6101/8000, real env return = 294.63


 76%|██████████████████████████▋        | 6111/8000 [5:38:59<1:21:31,  2.59s/it]

Episode 6111/8000, real env return = 294.63


 77%|██████████████████████████▊        | 6121/8000 [5:39:25<1:21:50,  2.61s/it]

Episode 6121/8000, real env return = 118.37


 77%|██████████████████████████▊        | 6131/8000 [5:39:50<1:21:44,  2.62s/it]

Episode 6131/8000, real env return = 293.75


 77%|██████████████████████████▊        | 6141/8000 [5:40:18<1:25:40,  2.77s/it]

Episode 6141/8000, real env return = 295.87


 77%|██████████████████████████▉        | 6151/8000 [5:40:46<1:25:33,  2.78s/it]

Episode 6151/8000, real env return = 294.42


 77%|██████████████████████████▉        | 6161/8000 [5:41:12<1:17:23,  2.52s/it]

Episode 6161/8000, real env return = 294.46


 77%|██████████████████████████▉        | 6171/8000 [5:41:39<1:22:59,  2.72s/it]

Episode 6171/8000, real env return = 294.45


 77%|███████████████████████████        | 6181/8000 [5:42:06<1:25:55,  2.83s/it]

Episode 6181/8000, real env return = 297.38


 77%|███████████████████████████        | 6191/8000 [5:42:32<1:20:08,  2.66s/it]

Episode 6191/8000, real env return = 291.05


 78%|███████████████████████████▏       | 6201/8000 [5:43:00<1:24:08,  2.81s/it]

Episode 6201/8000, real env return = 296.60


 78%|███████████████████████████▏       | 6211/8000 [5:43:29<1:28:34,  2.97s/it]

Episode 6211/8000, real env return = 294.69


 78%|███████████████████████████▏       | 6221/8000 [5:43:57<1:25:04,  2.87s/it]

Episode 6221/8000, real env return = 298.14


 78%|███████████████████████████▎       | 6231/8000 [5:44:25<1:23:58,  2.85s/it]

Episode 6231/8000, real env return = 295.45


 78%|███████████████████████████▎       | 6241/8000 [5:44:51<1:22:00,  2.80s/it]

Episode 6241/8000, real env return = 297.71


 78%|███████████████████████████▎       | 6251/8000 [5:45:17<1:10:00,  2.40s/it]

Episode 6251/8000, real env return = 294.54


 78%|███████████████████████████▍       | 6261/8000 [5:45:45<1:23:23,  2.88s/it]

Episode 6261/8000, real env return = 298.28


 78%|███████████████████████████▍       | 6271/8000 [5:46:11<1:15:19,  2.61s/it]

Episode 6271/8000, real env return = 295.90


 79%|███████████████████████████▍       | 6281/8000 [5:46:39<1:16:40,  2.68s/it]

Episode 6281/8000, real env return = 294.38


 79%|███████████████████████████▌       | 6291/8000 [5:47:06<1:20:15,  2.82s/it]

Episode 6291/8000, real env return = 297.34


 79%|███████████████████████████▌       | 6301/8000 [5:47:32<1:14:34,  2.63s/it]

Episode 6301/8000, real env return = 295.53


 79%|███████████████████████████▌       | 6311/8000 [5:48:00<1:22:00,  2.91s/it]

Episode 6311/8000, real env return = 291.52


 79%|███████████████████████████▋       | 6321/8000 [5:48:27<1:18:47,  2.82s/it]

Episode 6321/8000, real env return = 294.59


 79%|███████████████████████████▋       | 6331/8000 [5:48:56<1:17:33,  2.79s/it]

Episode 6331/8000, real env return = 296.66


 79%|███████████████████████████▋       | 6341/8000 [5:49:21<1:12:05,  2.61s/it]

Episode 6341/8000, real env return = 296.80


 79%|███████████████████████████▊       | 6351/8000 [5:49:50<1:16:45,  2.79s/it]

Episode 6351/8000, real env return = 300.01


 80%|███████████████████████████▊       | 6361/8000 [5:50:16<1:15:21,  2.76s/it]

Episode 6361/8000, real env return = 297.66


 80%|███████████████████████████▊       | 6371/8000 [5:50:43<1:11:30,  2.63s/it]

Episode 6371/8000, real env return = 155.47


 80%|███████████████████████████▉       | 6381/8000 [5:51:10<1:09:56,  2.59s/it]

Episode 6381/8000, real env return = 296.25


 80%|███████████████████████████▉       | 6391/8000 [5:51:36<1:14:43,  2.79s/it]

Episode 6391/8000, real env return = 296.99


 80%|████████████████████████████       | 6401/8000 [5:52:05<1:14:31,  2.80s/it]

Episode 6401/8000, real env return = 293.87


 80%|████████████████████████████       | 6411/8000 [5:52:32<1:10:38,  2.67s/it]

Episode 6411/8000, real env return = 293.38


 80%|████████████████████████████       | 6421/8000 [5:52:59<1:12:04,  2.74s/it]

Episode 6421/8000, real env return = 291.68


 80%|████████████████████████████▏      | 6431/8000 [5:53:28<1:11:43,  2.74s/it]

Episode 6431/8000, real env return = 296.78


 81%|████████████████████████████▏      | 6441/8000 [5:53:55<1:11:46,  2.76s/it]

Episode 6441/8000, real env return = 294.93


 81%|████████████████████████████▏      | 6451/8000 [5:54:23<1:11:28,  2.77s/it]

Episode 6451/8000, real env return = 296.63


 81%|████████████████████████████▎      | 6461/8000 [5:54:51<1:14:33,  2.91s/it]

Episode 6461/8000, real env return = 290.90


 81%|████████████████████████████▎      | 6471/8000 [5:55:19<1:12:21,  2.84s/it]

Episode 6471/8000, real env return = 299.85


 81%|████████████████████████████▎      | 6481/8000 [5:55:47<1:08:50,  2.72s/it]

Episode 6481/8000, real env return = 296.22


 81%|████████████████████████████▍      | 6491/8000 [5:56:13<1:05:15,  2.59s/it]

Episode 6491/8000, real env return = 294.38


 81%|████████████████████████████▍      | 6501/8000 [5:56:40<1:07:18,  2.69s/it]

Episode 6501/8000, real env return = 294.28


 81%|████████████████████████████▍      | 6511/8000 [5:57:08<1:07:05,  2.70s/it]

Episode 6511/8000, real env return = 291.90


 82%|████████████████████████████▌      | 6521/8000 [5:57:36<1:10:09,  2.85s/it]

Episode 6521/8000, real env return = 293.70


 82%|████████████████████████████▌      | 6531/8000 [5:58:04<1:08:33,  2.80s/it]

Episode 6531/8000, real env return = 295.03


 82%|████████████████████████████▌      | 6541/8000 [5:58:31<1:07:40,  2.78s/it]

Episode 6541/8000, real env return = 293.74


 82%|████████████████████████████▋      | 6551/8000 [5:58:57<1:06:07,  2.74s/it]

Episode 6551/8000, real env return = 295.65


 82%|████████████████████████████▋      | 6561/8000 [5:59:25<1:10:10,  2.93s/it]

Episode 6561/8000, real env return = 297.01


 82%|████████████████████████████▋      | 6571/8000 [5:59:53<1:04:44,  2.72s/it]

Episode 6571/8000, real env return = 295.25


 82%|████████████████████████████▊      | 6581/8000 [6:00:21<1:02:23,  2.64s/it]

Episode 6581/8000, real env return = 294.22


 82%|████████████████████████████▊      | 6591/8000 [6:00:49<1:04:20,  2.74s/it]

Episode 6591/8000, real env return = 294.32


 83%|████████████████████████████▉      | 6601/8000 [6:01:17<1:06:06,  2.84s/it]

Episode 6601/8000, real env return = 295.89


 83%|████████████████████████████▉      | 6611/8000 [6:01:44<1:02:33,  2.70s/it]

Episode 6611/8000, real env return = 296.96


 83%|████████████████████████████▉      | 6621/8000 [6:02:12<1:00:05,  2.61s/it]

Episode 6621/8000, real env return = 294.79


 83%|█████████████████████████████      | 6631/8000 [6:02:38<1:01:35,  2.70s/it]

Episode 6631/8000, real env return = 294.34


 83%|█████████████████████████████      | 6641/8000 [6:03:05<1:00:33,  2.67s/it]

Episode 6641/8000, real env return = 295.40


 83%|██████████████████████████████▊      | 6651/8000 [6:03:33<59:29,  2.65s/it]

Episode 6651/8000, real env return = 292.89


 83%|█████████████████████████████▏     | 6661/8000 [6:04:00<1:02:01,  2.78s/it]

Episode 6661/8000, real env return = 295.50


 83%|█████████████████████████████▏     | 6671/8000 [6:04:27<1:00:55,  2.75s/it]

Episode 6671/8000, real env return = 295.36


 84%|██████████████████████████████▉      | 6681/8000 [6:04:54<57:58,  2.64s/it]

Episode 6681/8000, real env return = 291.16


 84%|██████████████████████████████▉      | 6691/8000 [6:05:21<57:18,  2.63s/it]

Episode 6691/8000, real env return = 288.97


 84%|█████████████████████████████▎     | 6701/8000 [6:05:49<1:01:26,  2.84s/it]

Episode 6701/8000, real env return = 292.17


 84%|███████████████████████████████      | 6711/8000 [6:06:15<58:35,  2.73s/it]

Episode 6711/8000, real env return = 294.78


 84%|███████████████████████████████      | 6721/8000 [6:06:41<58:28,  2.74s/it]

Episode 6721/8000, real env return = 295.17


 84%|███████████████████████████████▏     | 6731/8000 [6:07:08<56:44,  2.68s/it]

Episode 6731/8000, real env return = 287.06


 84%|███████████████████████████████▏     | 6741/8000 [6:07:35<56:35,  2.70s/it]

Episode 6741/8000, real env return = 296.94


 84%|███████████████████████████████▏     | 6751/8000 [6:08:02<55:21,  2.66s/it]

Episode 6751/8000, real env return = 292.64


 85%|███████████████████████████████▎     | 6761/8000 [6:08:28<55:22,  2.68s/it]

Episode 6761/8000, real env return = 297.10


 85%|███████████████████████████████▎     | 6771/8000 [6:08:54<52:42,  2.57s/it]

Episode 6771/8000, real env return = 289.16


 85%|███████████████████████████████▎     | 6781/8000 [6:09:19<51:44,  2.55s/it]

Episode 6781/8000, real env return = 296.32


 85%|███████████████████████████████▍     | 6791/8000 [6:09:46<53:47,  2.67s/it]

Episode 6791/8000, real env return = 296.38


 85%|███████████████████████████████▍     | 6801/8000 [6:10:13<55:51,  2.80s/it]

Episode 6801/8000, real env return = 295.42


 85%|███████████████████████████████▌     | 6811/8000 [6:10:39<51:07,  2.58s/it]

Episode 6811/8000, real env return = 295.63


 85%|███████████████████████████████▌     | 6821/8000 [6:11:06<51:47,  2.64s/it]

Episode 6821/8000, real env return = 293.12


 85%|███████████████████████████████▌     | 6831/8000 [6:11:32<52:17,  2.68s/it]

Episode 6831/8000, real env return = 296.98


 86%|███████████████████████████████▋     | 6841/8000 [6:11:59<48:40,  2.52s/it]

Episode 6841/8000, real env return = 291.24


 86%|███████████████████████████████▋     | 6851/8000 [6:12:27<53:12,  2.78s/it]

Episode 6851/8000, real env return = 270.15


 86%|███████████████████████████████▋     | 6861/8000 [6:12:53<50:20,  2.65s/it]

Episode 6861/8000, real env return = 295.70


 86%|███████████████████████████████▊     | 6871/8000 [6:13:21<53:32,  2.85s/it]

Episode 6871/8000, real env return = 294.27


 86%|███████████████████████████████▊     | 6881/8000 [6:13:47<46:59,  2.52s/it]

Episode 6881/8000, real env return = 289.09


 86%|███████████████████████████████▊     | 6891/8000 [6:14:12<48:35,  2.63s/it]

Episode 6891/8000, real env return = 294.25


 86%|███████████████████████████████▉     | 6901/8000 [6:14:40<50:10,  2.74s/it]

Episode 6901/8000, real env return = 291.38


 86%|███████████████████████████████▉     | 6911/8000 [6:15:05<46:16,  2.55s/it]

Episode 6911/8000, real env return = 291.39


 87%|████████████████████████████████     | 6921/8000 [6:15:32<46:35,  2.59s/it]

Episode 6921/8000, real env return = 292.63


 87%|████████████████████████████████     | 6931/8000 [6:15:58<46:45,  2.62s/it]

Episode 6931/8000, real env return = 294.05


 87%|████████████████████████████████     | 6941/8000 [6:16:25<48:42,  2.76s/it]

Episode 6941/8000, real env return = 293.47


 87%|████████████████████████████████▏    | 6951/8000 [6:16:51<44:25,  2.54s/it]

Episode 6951/8000, real env return = 293.23


 87%|████████████████████████████████▏    | 6961/8000 [6:17:18<45:38,  2.64s/it]

Episode 6961/8000, real env return = 295.84


 87%|████████████████████████████████▏    | 6971/8000 [6:17:43<45:28,  2.65s/it]

Episode 6971/8000, real env return = 297.06


 87%|████████████████████████████████▎    | 6981/8000 [6:18:10<44:27,  2.62s/it]

Episode 6981/8000, real env return = 291.99


 87%|████████████████████████████████▎    | 6991/8000 [6:18:37<48:26,  2.88s/it]

Episode 6991/8000, real env return = 292.65


 88%|████████████████████████████████▍    | 7001/8000 [6:19:03<45:06,  2.71s/it]

Episode 7001/8000, real env return = 293.53


 88%|████████████████████████████████▍    | 7011/8000 [6:19:32<46:53,  2.85s/it]

Episode 7011/8000, real env return = 293.91


 88%|████████████████████████████████▍    | 7021/8000 [6:19:59<44:42,  2.74s/it]

Episode 7021/8000, real env return = 294.35


 88%|████████████████████████████████▌    | 7031/8000 [6:20:25<42:37,  2.64s/it]

Episode 7031/8000, real env return = 290.79


 88%|████████████████████████████████▌    | 7041/8000 [6:20:51<40:31,  2.54s/it]

Episode 7041/8000, real env return = 293.81


 88%|████████████████████████████████▌    | 7051/8000 [6:21:18<42:29,  2.69s/it]

Episode 7051/8000, real env return = 296.18


 88%|████████████████████████████████▋    | 7061/8000 [6:21:44<37:43,  2.41s/it]

Episode 7061/8000, real env return = 292.71


 88%|████████████████████████████████▋    | 7071/8000 [6:22:11<40:44,  2.63s/it]

Episode 7071/8000, real env return = 296.56


 89%|████████████████████████████████▋    | 7081/8000 [6:22:37<38:22,  2.51s/it]

Episode 7081/8000, real env return = 298.68


 89%|████████████████████████████████▊    | 7091/8000 [6:23:03<37:44,  2.49s/it]

Episode 7091/8000, real env return = 295.48


 89%|████████████████████████████████▊    | 7101/8000 [6:23:30<38:27,  2.57s/it]

Episode 7101/8000, real env return = 295.69


 89%|████████████████████████████████▉    | 7111/8000 [6:23:56<39:52,  2.69s/it]

Episode 7111/8000, real env return = 295.29


 89%|████████████████████████████████▉    | 7121/8000 [6:24:22<38:28,  2.63s/it]

Episode 7121/8000, real env return = 292.62


 89%|████████████████████████████████▉    | 7131/8000 [6:24:48<35:46,  2.47s/it]

Episode 7131/8000, real env return = 292.39


 89%|█████████████████████████████████    | 7141/8000 [6:25:14<36:15,  2.53s/it]

Episode 7141/8000, real env return = 293.74


 89%|█████████████████████████████████    | 7151/8000 [6:25:41<37:52,  2.68s/it]

Episode 7151/8000, real env return = 293.06


 90%|█████████████████████████████████    | 7161/8000 [6:26:07<35:31,  2.54s/it]

Episode 7161/8000, real env return = 288.36


 90%|█████████████████████████████████▏   | 7171/8000 [6:26:33<37:45,  2.73s/it]

Episode 7171/8000, real env return = 291.73


 90%|█████████████████████████████████▏   | 7181/8000 [6:26:58<33:36,  2.46s/it]

Episode 7181/8000, real env return = 80.31


 90%|█████████████████████████████████▎   | 7191/8000 [6:27:25<36:37,  2.72s/it]

Episode 7191/8000, real env return = 287.89


 90%|█████████████████████████████████▎   | 7201/8000 [6:27:51<33:45,  2.53s/it]

Episode 7201/8000, real env return = 290.57


 90%|█████████████████████████████████▎   | 7211/8000 [6:28:18<35:16,  2.68s/it]

Episode 7211/8000, real env return = 291.62


 90%|█████████████████████████████████▍   | 7221/8000 [6:28:44<34:37,  2.67s/it]

Episode 7221/8000, real env return = 286.38


 90%|█████████████████████████████████▍   | 7231/8000 [6:29:10<32:28,  2.53s/it]

Episode 7231/8000, real env return = 293.66


 91%|█████████████████████████████████▍   | 7241/8000 [6:29:37<33:39,  2.66s/it]

Episode 7241/8000, real env return = 287.14


 91%|█████████████████████████████████▌   | 7251/8000 [6:30:04<35:07,  2.81s/it]

Episode 7251/8000, real env return = 289.82


 91%|█████████████████████████████████▌   | 7261/8000 [6:30:29<29:29,  2.39s/it]

Episode 7261/8000, real env return = -82.80


 91%|█████████████████████████████████▋   | 7271/8000 [6:30:56<32:45,  2.70s/it]

Episode 7271/8000, real env return = 287.82


 91%|█████████████████████████████████▋   | 7281/8000 [6:31:22<31:17,  2.61s/it]

Episode 7281/8000, real env return = 287.68


 91%|█████████████████████████████████▋   | 7291/8000 [6:31:49<32:31,  2.75s/it]

Episode 7291/8000, real env return = 288.08


 91%|█████████████████████████████████▊   | 7301/8000 [6:32:15<30:58,  2.66s/it]

Episode 7301/8000, real env return = 288.66


 91%|█████████████████████████████████▊   | 7311/8000 [6:32:41<30:24,  2.65s/it]

Episode 7311/8000, real env return = 287.31


 92%|█████████████████████████████████▊   | 7321/8000 [6:33:09<30:53,  2.73s/it]

Episode 7321/8000, real env return = 288.75


 92%|█████████████████████████████████▉   | 7331/8000 [6:33:35<30:23,  2.73s/it]

Episode 7331/8000, real env return = 287.04


 92%|█████████████████████████████████▉   | 7341/8000 [6:34:02<29:59,  2.73s/it]

Episode 7341/8000, real env return = 290.34


 92%|█████████████████████████████████▉   | 7351/8000 [6:34:28<27:45,  2.57s/it]

Episode 7351/8000, real env return = 287.38


 92%|██████████████████████████████████   | 7361/8000 [6:34:54<29:14,  2.75s/it]

Episode 7361/8000, real env return = 291.63


 92%|██████████████████████████████████   | 7371/8000 [6:35:21<29:20,  2.80s/it]

Episode 7371/8000, real env return = 291.91


 92%|██████████████████████████████████▏  | 7381/8000 [6:35:47<28:44,  2.79s/it]

Episode 7381/8000, real env return = 292.03


 92%|██████████████████████████████████▏  | 7391/8000 [6:36:13<25:14,  2.49s/it]

Episode 7391/8000, real env return = 292.06


 93%|██████████████████████████████████▏  | 7401/8000 [6:36:40<27:09,  2.72s/it]

Episode 7401/8000, real env return = 291.09


 93%|██████████████████████████████████▎  | 7411/8000 [6:37:07<25:49,  2.63s/it]

Episode 7411/8000, real env return = 292.31


 93%|██████████████████████████████████▎  | 7421/8000 [6:37:34<25:21,  2.63s/it]

Episode 7421/8000, real env return = 292.37


 93%|██████████████████████████████████▎  | 7431/8000 [6:38:01<27:15,  2.87s/it]

Episode 7431/8000, real env return = 287.95


 93%|██████████████████████████████████▍  | 7441/8000 [6:38:28<25:34,  2.74s/it]

Episode 7441/8000, real env return = 294.88


 93%|██████████████████████████████████▍  | 7451/8000 [6:38:55<23:33,  2.57s/it]

Episode 7451/8000, real env return = 292.84


 93%|██████████████████████████████████▌  | 7461/8000 [6:39:20<23:00,  2.56s/it]

Episode 7461/8000, real env return = 289.65


 93%|██████████████████████████████████▌  | 7471/8000 [6:39:48<24:17,  2.75s/it]

Episode 7471/8000, real env return = 291.24


 94%|██████████████████████████████████▌  | 7481/8000 [6:40:14<22:38,  2.62s/it]

Episode 7481/8000, real env return = 288.16


 94%|██████████████████████████████████▋  | 7491/8000 [6:40:39<21:41,  2.56s/it]

Episode 7491/8000, real env return = 288.83


 94%|██████████████████████████████████▋  | 7501/8000 [6:41:05<20:59,  2.52s/it]

Episode 7501/8000, real env return = 290.10


 94%|██████████████████████████████████▋  | 7511/8000 [6:41:31<22:06,  2.71s/it]

Episode 7511/8000, real env return = 290.36


 94%|██████████████████████████████████▊  | 7521/8000 [6:41:57<20:00,  2.51s/it]

Episode 7521/8000, real env return = 291.92


 94%|██████████████████████████████████▊  | 7531/8000 [6:42:23<19:40,  2.52s/it]

Episode 7531/8000, real env return = 288.40


 94%|██████████████████████████████████▉  | 7541/8000 [6:42:48<20:41,  2.71s/it]

Episode 7541/8000, real env return = 291.60


 94%|██████████████████████████████████▉  | 7551/8000 [6:43:12<16:59,  2.27s/it]

Episode 7551/8000, real env return = 289.47


 95%|██████████████████████████████████▉  | 7561/8000 [6:43:37<17:55,  2.45s/it]

Episode 7561/8000, real env return = 288.68


 95%|███████████████████████████████████  | 7571/8000 [6:44:03<19:31,  2.73s/it]

Episode 7571/8000, real env return = 287.61


 95%|███████████████████████████████████  | 7581/8000 [6:44:29<17:33,  2.51s/it]

Episode 7581/8000, real env return = 289.22


 95%|███████████████████████████████████  | 7591/8000 [6:44:55<16:57,  2.49s/it]

Episode 7591/8000, real env return = 289.45


 95%|███████████████████████████████████▏ | 7601/8000 [6:45:20<17:28,  2.63s/it]

Episode 7601/8000, real env return = 290.76


 95%|███████████████████████████████████▏ | 7611/8000 [6:45:46<16:25,  2.53s/it]

Episode 7611/8000, real env return = 292.18


 95%|███████████████████████████████████▏ | 7621/8000 [6:46:11<16:08,  2.56s/it]

Episode 7621/8000, real env return = 289.39


 95%|███████████████████████████████████▎ | 7631/8000 [6:46:35<15:13,  2.48s/it]

Episode 7631/8000, real env return = 292.07


 96%|███████████████████████████████████▎ | 7641/8000 [6:47:02<15:42,  2.63s/it]

Episode 7641/8000, real env return = 291.84


 96%|███████████████████████████████████▍ | 7651/8000 [6:47:28<15:15,  2.62s/it]

Episode 7651/8000, real env return = 286.00


 96%|███████████████████████████████████▍ | 7661/8000 [6:47:53<14:27,  2.56s/it]

Episode 7661/8000, real env return = 292.07


 96%|███████████████████████████████████▍ | 7671/8000 [6:48:19<14:12,  2.59s/it]

Episode 7671/8000, real env return = 294.65


 96%|███████████████████████████████████▌ | 7681/8000 [6:48:46<15:41,  2.95s/it]

Episode 7681/8000, real env return = 289.91


 96%|███████████████████████████████████▌ | 7691/8000 [6:49:14<14:53,  2.89s/it]

Episode 7691/8000, real env return = 292.81


 96%|███████████████████████████████████▌ | 7701/8000 [6:49:41<13:36,  2.73s/it]

Episode 7701/8000, real env return = 293.68


 96%|███████████████████████████████████▋ | 7711/8000 [6:50:06<12:58,  2.69s/it]

Episode 7711/8000, real env return = 294.41


 97%|███████████████████████████████████▋ | 7721/8000 [6:50:32<12:44,  2.74s/it]

Episode 7721/8000, real env return = 291.51


 97%|███████████████████████████████████▊ | 7731/8000 [6:50:58<11:53,  2.65s/it]

Episode 7731/8000, real env return = 290.25


 97%|███████████████████████████████████▊ | 7741/8000 [6:51:26<11:48,  2.73s/it]

Episode 7741/8000, real env return = 293.47


 97%|███████████████████████████████████▊ | 7751/8000 [6:51:52<11:05,  2.67s/it]

Episode 7751/8000, real env return = 294.69


 97%|███████████████████████████████████▉ | 7761/8000 [6:52:20<11:28,  2.88s/it]

Episode 7761/8000, real env return = 294.60


 97%|███████████████████████████████████▉ | 7771/8000 [6:52:50<10:44,  2.82s/it]

Episode 7771/8000, real env return = 289.59


 97%|███████████████████████████████████▉ | 7781/8000 [6:53:18<10:02,  2.75s/it]

Episode 7781/8000, real env return = 292.85


 97%|████████████████████████████████████ | 7791/8000 [6:53:46<09:33,  2.74s/it]

Episode 7791/8000, real env return = 287.75


 98%|████████████████████████████████████ | 7801/8000 [6:54:13<08:53,  2.68s/it]

Episode 7801/8000, real env return = 288.60


 98%|████████████████████████████████████▏| 7811/8000 [6:54:41<08:33,  2.72s/it]

Episode 7811/8000, real env return = 291.83


 98%|████████████████████████████████████▏| 7821/8000 [6:55:06<07:31,  2.52s/it]

Episode 7821/8000, real env return = 292.07


 98%|████████████████████████████████████▏| 7831/8000 [6:55:34<07:38,  2.71s/it]

Episode 7831/8000, real env return = 293.23


 98%|████████████████████████████████████▎| 7841/8000 [6:56:03<07:37,  2.88s/it]

Episode 7841/8000, real env return = 292.74


 98%|████████████████████████████████████▎| 7851/8000 [6:56:30<06:58,  2.81s/it]

Episode 7851/8000, real env return = 292.27


 98%|████████████████████████████████████▎| 7861/8000 [6:56:56<06:31,  2.82s/it]

Episode 7861/8000, real env return = 290.19


 98%|████████████████████████████████████▍| 7871/8000 [6:57:23<05:39,  2.63s/it]

Episode 7871/8000, real env return = 292.19


 99%|████████████████████████████████████▍| 7881/8000 [6:57:50<05:17,  2.67s/it]

Episode 7881/8000, real env return = 293.72


 99%|████████████████████████████████████▍| 7891/8000 [6:58:16<04:38,  2.56s/it]

Episode 7891/8000, real env return = 292.12


 99%|████████████████████████████████████▌| 7901/8000 [6:58:42<04:22,  2.65s/it]

Episode 7901/8000, real env return = 293.85


 99%|████████████████████████████████████▌| 7911/8000 [6:59:08<03:53,  2.62s/it]

Episode 7911/8000, real env return = 290.01


 99%|████████████████████████████████████▋| 7921/8000 [6:59:34<03:27,  2.62s/it]

Episode 7921/8000, real env return = 293.90


 99%|████████████████████████████████████▋| 7931/8000 [7:00:00<02:50,  2.47s/it]

Episode 7931/8000, real env return = 293.91


 99%|████████████████████████████████████▋| 7941/8000 [7:00:27<02:38,  2.68s/it]

Episode 7941/8000, real env return = 293.37


 99%|████████████████████████████████████▊| 7951/8000 [7:00:55<02:20,  2.86s/it]

Episode 7951/8000, real env return = 291.62


100%|████████████████████████████████████▊| 7961/8000 [7:01:22<01:48,  2.79s/it]

Episode 7961/8000, real env return = 292.10


100%|████████████████████████████████████▊| 7971/8000 [7:01:50<01:21,  2.80s/it]

Episode 7971/8000, real env return = 291.11


100%|████████████████████████████████████▉| 7981/8000 [7:02:19<00:54,  2.86s/it]

Episode 7981/8000, real env return = 292.47


100%|████████████████████████████████████▉| 7991/8000 [7:02:46<00:24,  2.76s/it]

Episode 7991/8000, real env return = 294.76


100%|█████████████████████████████████████| 8000/8000 [7:03:11<00:00,  3.17s/it]


Training finished.
