In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
示例：使用Stable Baselines3的SAC，并结合随机返回分解(RRD)来处理稀疏/延迟奖励环境。
代码仅用于演示核心思路，可能无法完全复现论文结果。
"""

import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

from stable_baselines3 import SAC
from stable_baselines3.sac.policies import SACPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.logger import configure


# ========== 1. 环境包装：将每步奖励改为0，只在episode结束时返回总reward ==========
class EpisodicRewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.cumulative_reward = 0.0

    def step(self, action):
        result = self.env.step(action)
        # 支持返回4个或5个值
        if len(result) == 5:
            obs, reward, terminated, truncated, info = result
            done = terminated or truncated
        else:
            obs, reward, done, info = result
        self.cumulative_reward += reward
        if done:
            reward = self.cumulative_reward
            self.cumulative_reward = 0.0
        else:
            reward = 0.0
        return obs, reward, done, info

    def reset(self, **kwargs):
        self.cumulative_reward = 0.0
        result = self.env.reset(**kwargs)
        # 支持返回tuple (obs, info) 或单个obs
        if isinstance(result, tuple):
            obs, _ = result
        else:
            obs = result
        return obs


# ========== 2. 轨迹存储（为了做 RRD） ==========
class TrajectoryReplay:
    """
    存储一条完整轨迹 (s, a, r, s_next, done)，
    其中 r 是“环境最终给出的总回报”（只有最后一步非0），其余为0。
    """
    def __init__(self):
        self.trajectories = []

    def add_trajectory(self, traj):
        """
        traj: list of (s, a, r, s_next, done)
        """
        self.trajectories.append(traj)

    def sample(self, batch_size):
        """
        随机采样batch_size条轨迹
        """
        indices = np.random.randint(0, len(self.trajectories), size=batch_size)
        return [self.trajectories[i] for i in indices]

    def __len__(self):
        return len(self.trajectories)


# ========== 3. 代理奖励模型： R(s,a) ==========
class RewardModel(nn.Module):
    """
    一个简单的两层网络，用于近似代理奖励 R(s,a)。
    """
    def __init__(self, state_dim, action_dim, hidden_size=256):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim + action_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )

    def forward(self, s, a):
        x = torch.cat([s, a], dim=-1)
        return self.net(x)


# ========== 4. RRD训练函数：随机返回分解 ==========
def sample_subsequence_indices(traj_length, K):
    """
    从 [0, traj_length-1] 中随机选择 K 个不重复的索引
    """
    if K > traj_length:
        K = traj_length
    return np.random.choice(traj_length, size=K, replace=False)

def train_reward_model_rrd(reward_model, optimizer, trajectories, K=64, device='cpu'):
    """
    使用随机返回分解(RRD)来训练代理奖励模型。
    其中 trajectories 是一个batch的完整轨迹列表。
    """
    reward_model.train()
    M = len(trajectories)

    loss_sum = 0.0
    for traj in trajectories:
        # 轨迹的总回报 = 轨迹最后一步的reward(因为中间为0，最后一步为总和)
        R_ep = sum([t[2] for t in traj])  # 或者直接 traj[-1][2]

        # 收集这条轨迹的(s, a)
        s_list = []
        a_list = []
        for (s, a, r, s_next, done) in traj:
            s_list.append(s)
            a_list.append(a)

        states = torch.tensor(s_list, dtype=torch.float, device=device)
        actions = torch.tensor(a_list, dtype=torch.float, device=device)

        # 随机采样子序列
        Tj = len(traj)
        idx_subseq = sample_subsequence_indices(Tj, K)

        # 计算子序列上 R(s,a) 之和
        R_sa_sub = reward_model(states[idx_subseq], actions[idx_subseq])
        sum_R_sa_sub = R_sa_sub.sum()

        # (R_ep - sum_R_sa_sub)^2
        diff = (R_ep - sum_R_sa_sub) ** 2
        loss_sum += diff

    loss = loss_sum / M
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss.item()


# ========== 5. 收集轨迹的函数：使用当前策略进行若干回合采样 ==========
def collect_episodes(env, model, n_episodes, device='cpu'):
    trajectories = []
    for _ in range(n_episodes):
        result = env.reset()
        if isinstance(result, tuple):
            obs, _ = result
        else:
            obs = result
        done = False
        traj = []
        while not done:
            if obs is None or (hasattr(obs, '__len__') and len(obs) == 0):
                raise ValueError("Observation is empty, check env.reset() output")
            obs_tensor = torch.tensor(obs, dtype=torch.float, device=device).unsqueeze(0)
            with torch.no_grad():
                action, _states = model.predict(obs_tensor.cpu().numpy(), deterministic=False)
            result = env.step(action[0])
            if len(result) == 5:
                next_obs, reward, terminated, truncated, info = result
                done = terminated or truncated
            else:
                next_obs, reward, done, info = result
            traj.append((obs, action[0], reward, next_obs, done))
            obs = next_obs
        trajectories.append(traj)
    return trajectories


# ========== 6. 将RRD后的代理奖励存入SAC的ReplayBuffer ==========
def add_shaped_transitions_to_replay(model, reward_model, trajectories, gamma=0.99, device='cpu'):
    """
    对收集到的轨迹，用reward_model计算每步的代理奖励，然后存入SAC的replay buffer。
    """
    for traj in trajectories:
        states = []
        actions = []
        next_states = []
        dones = []
        for (s, a, r, s_next, d) in traj:
            states.append(s)
            actions.append(a)
            next_states.append(s_next)
            dones.append(d)

        # 转成张量
        s_tensor = torch.tensor(states, dtype=torch.float, device=device)
        a_tensor = torch.tensor(actions, dtype=torch.float, device=device)

        with torch.no_grad():
            shaped_rewards = reward_model(s_tensor, a_tensor).cpu().numpy().squeeze(-1)

        for i in range(len(traj)):
            s, a, _, s_next, d = traj[i]
            r_shaped = shaped_rewards[i]
            model.replay_buffer.add(
                s, s_next, a, r_shaped, d, infos=[{}],
            )




# ========== 7. 主函数：整合以上模块 ==========
def main():
    class Args:
        env = "HalfCheetah-v4"
        episodes = 2000
        steps_per_update = 1000
        rrd_k = 64
        device = "cuda"
    
    args = Args()

    device = torch.device(args.device)

    # 1. 创建环境 (只在最后一步返回总reward)
    base_env = gym.make(args.env)
    env = EpisodicRewardWrapper(base_env)
    # stable-baselines3 要求环境是VecEnv，故用 DummyVecEnv 包一下
    vec_env = DummyVecEnv([lambda: env])

    # 2. 创建SAC模型
    #    注意：此时环境返回的奖励是稀疏且延迟的，但我们并不直接用它来训练SAC。
    #    我们会手动往 replay_buffer 里塞“代理奖励”。
    model = SAC(
        policy="MlpPolicy",
        env=vec_env,  # 这里仍然要传env，但不会直接用它的奖励来学习
        verbose=1,
        seed=42,
        buffer_size=100000,
        learning_starts=0,       # 让它不必等待就能开始学习
        train_freq=1,            # 每次加一个transition就可以触发一次训练(后面我们自定义逻辑)
        gradient_steps=0,        # 我们会手动调用 train
        batch_size=256,
        gamma=0.99,
        device=device
    )

    model._logger = configure(folder=None, format_strings=["stdout"])

    # 3. 创建奖励模型
    #    需要知道 state_dim, action_dim
    state_dim = base_env.observation_space.shape[0]
    action_dim = base_env.action_space.shape[0]
    reward_model = RewardModel(state_dim, action_dim, hidden_size=256).to(device)
    reward_optimizer = optim.Adam(reward_model.parameters(), lr=3e-4)

    # 4. 收集轨迹 & RRD & SAC循环
    #    在每个回合：
    #      (a) 用当前策略收集一些episode
    #      (b) 用R(·)更新 reward_model (RRD)
    #      (c) 用更新后的 R(·) 生成 shaped reward，存入SAC的 replay buffer
    #      (d) 调用 model.train(gradient_steps=...) 进行SAC更新
    traj_replay = TrajectoryReplay()

    ep_record = []
    for ep in tqdm(range(args.episodes)):
        # (a) 收集1条或多条轨迹
        new_trajectories = collect_episodes(env, model, n_episodes=1, device=device)
        # 放入自定义的trajectory存储中
        for traj in new_trajectories:
            traj_replay.add_trajectory(traj)

        # (b) 如果轨迹数足够，就训练reward_model
        if len(traj_replay) >= 8:  # 大于一定数量再开始
            batch_size_trajectories = 8
            sampled_trajs = traj_replay.sample(batch_size_trajectories)
            rrd_loss = train_reward_model_rrd(
                reward_model, reward_optimizer, sampled_trajs,
                K=args.rrd_k, device=device
            )

        # (c) 将这些新轨迹用新的 reward_model 计算 shaped reward，添加到 SAC replay buffer
        add_shaped_transitions_to_replay(model, reward_model, new_trajectories, device=device)

        # (d) 调用 model.train(gradient_steps=...) 更新SAC
        #     这里的 steps-per-update 可视情况而定
        if len(traj_replay) >= 8:
            model.train(args.steps_per_update)

        # (e) 简单的评估：看一下最后一条trajectory的真实环境回报
        ep_return = sum([t[2] for t in new_trajectories[-1]])
        ep_record.append(ep_return)
        
        if ep % 10 == 0:
            print(f"Episode {ep+1}/{args.episodes}, real env return = {ep_return:.2f}")

    # 训练结束
    print("Training finished.")
    return ep_record

In [2]:
train1 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<24:53,  1.34it/s]

Episode 1/2000, real env return = -236.78


  1%|▏                                      | 11/2000 [00:25<2:16:54,  4.13s/it]

Episode 11/2000, real env return = -257.93


  1%|▍                                      | 21/2000 [01:16<2:46:32,  5.05s/it]

Episode 21/2000, real env return = -358.63


  2%|▌                                      | 31/2000 [02:09<2:52:38,  5.26s/it]

Episode 31/2000, real env return = -351.09


  2%|▊                                      | 41/2000 [03:01<2:49:57,  5.21s/it]

Episode 41/2000, real env return = -493.40


  3%|▉                                      | 51/2000 [03:53<2:48:04,  5.17s/it]

Episode 51/2000, real env return = -490.65


  3%|█▏                                     | 61/2000 [04:45<2:46:40,  5.16s/it]

Episode 61/2000, real env return = -291.96


  4%|█▍                                     | 71/2000 [05:37<2:48:38,  5.25s/it]

Episode 71/2000, real env return = 397.00


  4%|█▌                                     | 81/2000 [06:29<2:45:29,  5.17s/it]

Episode 81/2000, real env return = -419.96


  5%|█▊                                     | 91/2000 [07:21<2:44:01,  5.16s/it]

Episode 91/2000, real env return = -391.46


  5%|█▉                                    | 101/2000 [08:12<2:41:35,  5.11s/it]

Episode 101/2000, real env return = -381.46


  6%|██                                    | 111/2000 [09:04<2:41:38,  5.13s/it]

Episode 111/2000, real env return = -326.89


  6%|██▎                                   | 121/2000 [09:56<2:42:16,  5.18s/it]

Episode 121/2000, real env return = -237.98


  7%|██▍                                   | 131/2000 [10:48<2:41:55,  5.20s/it]

Episode 131/2000, real env return = -226.67


  7%|██▋                                   | 141/2000 [11:40<2:42:26,  5.24s/it]

Episode 141/2000, real env return = -290.78


  8%|██▊                                   | 151/2000 [12:32<2:40:57,  5.22s/it]

Episode 151/2000, real env return = -159.78


  8%|███                                   | 161/2000 [13:24<2:38:22,  5.17s/it]

Episode 161/2000, real env return = -291.04


  9%|███▏                                  | 171/2000 [14:17<2:37:54,  5.18s/it]

Episode 171/2000, real env return = -256.01


  9%|███▍                                  | 181/2000 [15:09<2:37:16,  5.19s/it]

Episode 181/2000, real env return = -345.45


 10%|███▋                                  | 191/2000 [16:01<2:35:32,  5.16s/it]

Episode 191/2000, real env return = -313.51


 10%|███▊                                  | 201/2000 [16:53<2:34:48,  5.16s/it]

Episode 201/2000, real env return = -317.55


 11%|████                                  | 211/2000 [17:45<2:34:48,  5.19s/it]

Episode 211/2000, real env return = -332.53


 11%|████▏                                 | 221/2000 [18:37<2:34:42,  5.22s/it]

Episode 221/2000, real env return = -318.48


 12%|████▍                                 | 231/2000 [19:28<2:29:42,  5.08s/it]

Episode 231/2000, real env return = -305.73


 12%|████▌                                 | 241/2000 [20:20<2:30:15,  5.13s/it]

Episode 241/2000, real env return = -313.87


 13%|████▊                                 | 251/2000 [21:11<2:29:45,  5.14s/it]

Episode 251/2000, real env return = -291.02


 13%|████▉                                 | 261/2000 [22:03<2:28:50,  5.14s/it]

Episode 261/2000, real env return = -265.58


 14%|█████▏                                | 271/2000 [22:54<2:28:16,  5.15s/it]

Episode 271/2000, real env return = -371.82


 14%|█████▎                                | 281/2000 [23:46<2:27:15,  5.14s/it]

Episode 281/2000, real env return = -308.44


 15%|█████▌                                | 291/2000 [24:38<2:27:29,  5.18s/it]

Episode 291/2000, real env return = -303.23


 15%|█████▋                                | 301/2000 [25:29<2:26:17,  5.17s/it]

Episode 301/2000, real env return = -278.55


 16%|█████▉                                | 311/2000 [26:20<2:26:35,  5.21s/it]

Episode 311/2000, real env return = -110.89


 16%|██████                                | 321/2000 [27:12<2:24:29,  5.16s/it]

Episode 321/2000, real env return = -280.16


 17%|██████▎                               | 331/2000 [28:03<2:22:16,  5.12s/it]

Episode 331/2000, real env return = -166.66


 17%|██████▍                               | 341/2000 [28:54<2:20:57,  5.10s/it]

Episode 341/2000, real env return = -64.11


 18%|██████▋                               | 351/2000 [29:46<2:21:26,  5.15s/it]

Episode 351/2000, real env return = -121.12


 18%|██████▊                               | 361/2000 [30:37<2:19:56,  5.12s/it]

Episode 361/2000, real env return = -257.17


 19%|███████                               | 371/2000 [31:28<2:18:16,  5.09s/it]

Episode 371/2000, real env return = -112.20


 19%|███████▏                              | 381/2000 [32:19<2:17:26,  5.09s/it]

Episode 381/2000, real env return = -139.28


 20%|███████▍                              | 391/2000 [33:11<2:17:47,  5.14s/it]

Episode 391/2000, real env return = -86.38


 20%|███████▌                              | 401/2000 [34:02<2:16:18,  5.11s/it]

Episode 401/2000, real env return = -132.10


 21%|███████▊                              | 411/2000 [34:54<2:15:47,  5.13s/it]

Episode 411/2000, real env return = -96.80


 21%|███████▉                              | 421/2000 [35:45<2:15:30,  5.15s/it]

Episode 421/2000, real env return = -163.29


 22%|████████▏                             | 431/2000 [36:37<2:16:53,  5.24s/it]

Episode 431/2000, real env return = -244.08


 22%|████████▍                             | 441/2000 [37:28<2:13:41,  5.14s/it]

Episode 441/2000, real env return = -237.21


 23%|████████▌                             | 451/2000 [38:20<2:13:46,  5.18s/it]

Episode 451/2000, real env return = -240.94


 23%|████████▊                             | 461/2000 [39:11<2:11:24,  5.12s/it]

Episode 461/2000, real env return = -228.09


 24%|████████▉                             | 471/2000 [40:03<2:10:24,  5.12s/it]

Episode 471/2000, real env return = -160.66


 24%|█████████▏                            | 481/2000 [40:54<2:11:04,  5.18s/it]

Episode 481/2000, real env return = -109.19


 25%|█████████▎                            | 491/2000 [41:45<2:09:15,  5.14s/it]

Episode 491/2000, real env return = -149.84


 25%|█████████▌                            | 501/2000 [42:36<2:08:57,  5.16s/it]

Episode 501/2000, real env return = -148.95


 26%|█████████▋                            | 511/2000 [43:27<2:06:18,  5.09s/it]

Episode 511/2000, real env return = -172.99


 26%|█████████▉                            | 521/2000 [44:19<2:06:09,  5.12s/it]

Episode 521/2000, real env return = -149.60


 27%|██████████                            | 531/2000 [45:10<2:04:55,  5.10s/it]

Episode 531/2000, real env return = -136.59


 27%|██████████▎                           | 541/2000 [46:01<2:04:34,  5.12s/it]

Episode 541/2000, real env return = -187.00


 28%|██████████▍                           | 551/2000 [46:52<2:02:53,  5.09s/it]

Episode 551/2000, real env return = -278.97


 28%|██████████▋                           | 561/2000 [47:44<2:01:50,  5.08s/it]

Episode 561/2000, real env return = -268.68


 29%|██████████▊                           | 571/2000 [48:35<2:01:50,  5.12s/it]

Episode 571/2000, real env return = -251.34


 29%|███████████                           | 581/2000 [49:26<2:00:32,  5.10s/it]

Episode 581/2000, real env return = -256.13


 30%|███████████▏                          | 591/2000 [50:18<2:00:23,  5.13s/it]

Episode 591/2000, real env return = -247.75


 30%|███████████▍                          | 601/2000 [51:09<1:59:01,  5.10s/it]

Episode 601/2000, real env return = -252.18


 31%|███████████▌                          | 611/2000 [52:00<1:58:00,  5.10s/it]

Episode 611/2000, real env return = -258.61


 31%|███████████▊                          | 621/2000 [52:51<2:01:13,  5.27s/it]

Episode 621/2000, real env return = -210.77


 32%|███████████▉                          | 631/2000 [53:43<1:58:13,  5.18s/it]

Episode 631/2000, real env return = -152.15


 32%|████████████▏                         | 641/2000 [54:34<1:57:23,  5.18s/it]

Episode 641/2000, real env return = -278.76


 33%|████████████▎                         | 651/2000 [55:26<1:56:39,  5.19s/it]

Episode 651/2000, real env return = -198.19


 33%|████████████▌                         | 661/2000 [56:17<1:54:29,  5.13s/it]

Episode 661/2000, real env return = -225.04


 34%|████████████▋                         | 671/2000 [57:09<1:54:55,  5.19s/it]

Episode 671/2000, real env return = 84.46


 34%|████████████▉                         | 681/2000 [58:00<1:52:09,  5.10s/it]

Episode 681/2000, real env return = 49.60


 35%|█████████████▏                        | 691/2000 [58:51<1:52:06,  5.14s/it]

Episode 691/2000, real env return = -53.45


 35%|█████████████▎                        | 701/2000 [59:43<1:52:20,  5.19s/it]

Episode 701/2000, real env return = 568.22


 36%|████████████▊                       | 711/2000 [1:00:34<1:49:09,  5.08s/it]

Episode 711/2000, real env return = 840.98


 36%|████████████▉                       | 721/2000 [1:01:25<1:48:21,  5.08s/it]

Episode 721/2000, real env return = 687.38


 37%|█████████████▏                      | 731/2000 [1:02:17<1:50:16,  5.21s/it]

Episode 731/2000, real env return = 368.27


 37%|█████████████▎                      | 741/2000 [1:03:08<1:47:48,  5.14s/it]

Episode 741/2000, real env return = 479.38


 38%|█████████████▌                      | 751/2000 [1:04:00<1:46:53,  5.14s/it]

Episode 751/2000, real env return = 573.43


 38%|█████████████▋                      | 761/2000 [1:04:51<1:46:19,  5.15s/it]

Episode 761/2000, real env return = 1455.34


 39%|█████████████▉                      | 771/2000 [1:05:42<1:44:41,  5.11s/it]

Episode 771/2000, real env return = 1992.37


 39%|██████████████                      | 781/2000 [1:06:33<1:44:52,  5.16s/it]

Episode 781/2000, real env return = 2514.16


 40%|██████████████▏                     | 791/2000 [1:07:25<1:43:54,  5.16s/it]

Episode 791/2000, real env return = 2478.15


 40%|██████████████▍                     | 801/2000 [1:08:16<1:43:32,  5.18s/it]

Episode 801/2000, real env return = 3047.12


 41%|██████████████▌                     | 811/2000 [1:09:07<1:41:34,  5.13s/it]

Episode 811/2000, real env return = 2896.85


 41%|██████████████▊                     | 821/2000 [1:09:58<1:40:08,  5.10s/it]

Episode 821/2000, real env return = 3273.69


 42%|██████████████▉                     | 831/2000 [1:10:49<1:39:08,  5.09s/it]

Episode 831/2000, real env return = 3447.40


 42%|███████████████▏                    | 841/2000 [1:11:41<1:38:15,  5.09s/it]

Episode 841/2000, real env return = 3615.07


 43%|███████████████▎                    | 851/2000 [1:12:32<1:37:41,  5.10s/it]

Episode 851/2000, real env return = 3851.80


 43%|███████████████▍                    | 861/2000 [1:13:23<1:37:35,  5.14s/it]

Episode 861/2000, real env return = 3611.56


 44%|███████████████▋                    | 871/2000 [1:14:15<1:37:16,  5.17s/it]

Episode 871/2000, real env return = 3774.96


 44%|███████████████▊                    | 881/2000 [1:15:06<1:35:59,  5.15s/it]

Episode 881/2000, real env return = 3918.01


 45%|████████████████                    | 891/2000 [1:15:57<1:34:50,  5.13s/it]

Episode 891/2000, real env return = 3825.07


 45%|████████████████▏                   | 901/2000 [1:16:49<1:34:32,  5.16s/it]

Episode 901/2000, real env return = 4133.36


 46%|████████████████▍                   | 911/2000 [1:17:41<1:33:48,  5.17s/it]

Episode 911/2000, real env return = 4017.52


 46%|████████████████▌                   | 921/2000 [1:18:32<1:33:35,  5.20s/it]

Episode 921/2000, real env return = 4504.50


 47%|████████████████▊                   | 931/2000 [1:19:23<1:32:27,  5.19s/it]

Episode 931/2000, real env return = 4482.66


 47%|████████████████▉                   | 941/2000 [1:20:15<1:30:39,  5.14s/it]

Episode 941/2000, real env return = 4585.78


 48%|█████████████████                   | 951/2000 [1:21:06<1:29:13,  5.10s/it]

Episode 951/2000, real env return = 4838.25


 48%|█████████████████▎                  | 961/2000 [1:21:58<1:29:03,  5.14s/it]

Episode 961/2000, real env return = 4819.63


 49%|█████████████████▍                  | 971/2000 [1:22:49<1:27:20,  5.09s/it]

Episode 971/2000, real env return = 4675.77


 49%|█████████████████▋                  | 981/2000 [1:23:40<1:26:06,  5.07s/it]

Episode 981/2000, real env return = 4663.08


 50%|█████████████████▊                  | 991/2000 [1:24:31<1:26:57,  5.17s/it]

Episode 991/2000, real env return = 4844.33


 50%|█████████████████▌                 | 1001/2000 [1:25:22<1:24:51,  5.10s/it]

Episode 1001/2000, real env return = 5075.90


 51%|█████████████████▋                 | 1011/2000 [1:26:13<1:24:31,  5.13s/it]

Episode 1011/2000, real env return = 4978.97


 51%|█████████████████▊                 | 1021/2000 [1:27:04<1:23:28,  5.12s/it]

Episode 1021/2000, real env return = 4376.30


 52%|██████████████████                 | 1031/2000 [1:27:55<1:23:00,  5.14s/it]

Episode 1031/2000, real env return = 5393.06


 52%|██████████████████▏                | 1041/2000 [1:28:46<1:21:58,  5.13s/it]

Episode 1041/2000, real env return = 5283.04


 53%|██████████████████▍                | 1051/2000 [1:29:37<1:19:33,  5.03s/it]

Episode 1051/2000, real env return = 5371.54


 53%|██████████████████▌                | 1061/2000 [1:30:28<1:19:12,  5.06s/it]

Episode 1061/2000, real env return = 5214.77


 54%|██████████████████▋                | 1071/2000 [1:31:19<1:17:58,  5.04s/it]

Episode 1071/2000, real env return = 5374.34


 54%|██████████████████▉                | 1081/2000 [1:32:10<1:17:59,  5.09s/it]

Episode 1081/2000, real env return = 5554.64


 55%|███████████████████                | 1091/2000 [1:33:01<1:17:25,  5.11s/it]

Episode 1091/2000, real env return = 5190.72


 55%|███████████████████▎               | 1101/2000 [1:33:52<1:16:51,  5.13s/it]

Episode 1101/2000, real env return = 5497.16


 56%|███████████████████▍               | 1111/2000 [1:34:44<1:15:45,  5.11s/it]

Episode 1111/2000, real env return = 5276.06


 56%|███████████████████▌               | 1121/2000 [1:35:35<1:15:10,  5.13s/it]

Episode 1121/2000, real env return = 5706.34


 57%|███████████████████▊               | 1131/2000 [1:36:26<1:14:52,  5.17s/it]

Episode 1131/2000, real env return = 5770.64


 57%|███████████████████▉               | 1141/2000 [1:37:17<1:13:38,  5.14s/it]

Episode 1141/2000, real env return = 5789.39


 58%|████████████████████▏              | 1151/2000 [1:38:08<1:12:18,  5.11s/it]

Episode 1151/2000, real env return = 5579.17


 58%|████████████████████▎              | 1161/2000 [1:38:59<1:11:38,  5.12s/it]

Episode 1161/2000, real env return = 5493.66


 59%|████████████████████▍              | 1171/2000 [1:39:50<1:10:27,  5.10s/it]

Episode 1171/2000, real env return = 5738.10


 59%|████████████████████▋              | 1181/2000 [1:40:41<1:08:56,  5.05s/it]

Episode 1181/2000, real env return = 5554.40


 60%|████████████████████▊              | 1191/2000 [1:41:33<1:09:01,  5.12s/it]

Episode 1191/2000, real env return = 5281.65


 60%|█████████████████████              | 1201/2000 [1:42:24<1:08:15,  5.13s/it]

Episode 1201/2000, real env return = 5922.34


 61%|█████████████████████▏             | 1211/2000 [1:43:15<1:07:49,  5.16s/it]

Episode 1211/2000, real env return = 5700.69


 61%|█████████████████████▎             | 1221/2000 [1:44:06<1:05:48,  5.07s/it]

Episode 1221/2000, real env return = 5904.81


 62%|█████████████████████▌             | 1231/2000 [1:44:59<1:08:24,  5.34s/it]

Episode 1231/2000, real env return = 6020.71


 62%|█████████████████████▋             | 1241/2000 [1:45:52<1:05:32,  5.18s/it]

Episode 1241/2000, real env return = 5170.56


 63%|█████████████████████▉             | 1251/2000 [1:46:44<1:05:31,  5.25s/it]

Episode 1251/2000, real env return = 6147.66


 63%|██████████████████████             | 1261/2000 [1:47:35<1:04:08,  5.21s/it]

Episode 1261/2000, real env return = 6049.99


 64%|██████████████████████▏            | 1271/2000 [1:48:26<1:02:14,  5.12s/it]

Episode 1271/2000, real env return = 6033.41


 64%|██████████████████████▍            | 1281/2000 [1:49:18<1:01:11,  5.11s/it]

Episode 1281/2000, real env return = 5473.22


 65%|██████████████████████▌            | 1291/2000 [1:50:09<1:00:16,  5.10s/it]

Episode 1291/2000, real env return = 5778.97


 65%|████████████████████████             | 1301/2000 [1:51:00<59:39,  5.12s/it]

Episode 1301/2000, real env return = 5863.99


 66%|████████████████████████▎            | 1311/2000 [1:51:52<58:27,  5.09s/it]

Episode 1311/2000, real env return = 5040.34


 66%|████████████████████████▍            | 1321/2000 [1:52:43<57:58,  5.12s/it]

Episode 1321/2000, real env return = 5837.32


 67%|████████████████████████▌            | 1331/2000 [1:53:34<56:52,  5.10s/it]

Episode 1331/2000, real env return = 5390.65


 67%|████████████████████████▊            | 1341/2000 [1:54:25<56:29,  5.14s/it]

Episode 1341/2000, real env return = 5306.06


 68%|████████████████████████▉            | 1351/2000 [1:55:17<56:01,  5.18s/it]

Episode 1351/2000, real env return = 5590.35


 68%|█████████████████████████▏           | 1361/2000 [1:56:08<55:06,  5.17s/it]

Episode 1361/2000, real env return = 6244.26


 69%|█████████████████████████▎           | 1371/2000 [1:56:59<53:57,  5.15s/it]

Episode 1371/2000, real env return = 6180.02


 69%|█████████████████████████▌           | 1381/2000 [1:57:51<53:31,  5.19s/it]

Episode 1381/2000, real env return = 5420.58


 70%|█████████████████████████▋           | 1391/2000 [1:58:43<52:14,  5.15s/it]

Episode 1391/2000, real env return = 5817.88


 70%|█████████████████████████▉           | 1401/2000 [1:59:34<51:04,  5.12s/it]

Episode 1401/2000, real env return = 5784.17


 71%|██████████████████████████           | 1411/2000 [2:00:27<53:14,  5.42s/it]

Episode 1411/2000, real env return = 6118.28


 71%|██████████████████████████▎          | 1421/2000 [2:01:20<49:51,  5.17s/it]

Episode 1421/2000, real env return = 5794.41


 72%|██████████████████████████▍          | 1431/2000 [2:02:13<49:50,  5.25s/it]

Episode 1431/2000, real env return = 6183.55


 72%|██████████████████████████▋          | 1441/2000 [2:03:05<47:20,  5.08s/it]

Episode 1441/2000, real env return = 6056.50


 73%|██████████████████████████▊          | 1451/2000 [2:03:56<46:19,  5.06s/it]

Episode 1451/2000, real env return = 5925.84


 73%|███████████████████████████          | 1461/2000 [2:04:48<46:51,  5.22s/it]

Episode 1461/2000, real env return = 6142.22


 74%|███████████████████████████▏         | 1471/2000 [2:05:39<44:58,  5.10s/it]

Episode 1471/2000, real env return = 5712.28


 74%|███████████████████████████▍         | 1481/2000 [2:06:31<44:57,  5.20s/it]

Episode 1481/2000, real env return = 5835.09


 75%|███████████████████████████▌         | 1491/2000 [2:07:22<43:38,  5.14s/it]

Episode 1491/2000, real env return = 5711.51


 75%|███████████████████████████▊         | 1501/2000 [2:08:13<42:35,  5.12s/it]

Episode 1501/2000, real env return = 6342.42


 76%|███████████████████████████▉         | 1511/2000 [2:09:05<42:04,  5.16s/it]

Episode 1511/2000, real env return = 5871.59


 76%|████████████████████████████▏        | 1521/2000 [2:09:56<41:17,  5.17s/it]

Episode 1521/2000, real env return = 5888.31


 77%|████████████████████████████▎        | 1531/2000 [2:10:48<40:04,  5.13s/it]

Episode 1531/2000, real env return = 6449.03


 77%|████████████████████████████▌        | 1541/2000 [2:11:40<39:27,  5.16s/it]

Episode 1541/2000, real env return = 6185.88


 78%|████████████████████████████▋        | 1551/2000 [2:12:31<38:04,  5.09s/it]

Episode 1551/2000, real env return = 6416.01


 78%|████████████████████████████▉        | 1561/2000 [2:13:22<37:21,  5.11s/it]

Episode 1561/2000, real env return = 6582.41


 79%|█████████████████████████████        | 1571/2000 [2:14:14<36:31,  5.11s/it]

Episode 1571/2000, real env return = 6408.12


 79%|█████████████████████████████▏       | 1581/2000 [2:15:05<35:52,  5.14s/it]

Episode 1581/2000, real env return = 5856.48


 80%|█████████████████████████████▍       | 1591/2000 [2:15:56<34:42,  5.09s/it]

Episode 1591/2000, real env return = 6079.04


 80%|█████████████████████████████▌       | 1601/2000 [2:16:48<34:19,  5.16s/it]

Episode 1601/2000, real env return = 6580.67


 81%|█████████████████████████████▊       | 1611/2000 [2:17:39<33:20,  5.14s/it]

Episode 1611/2000, real env return = 6320.74


 81%|█████████████████████████████▉       | 1621/2000 [2:18:31<32:51,  5.20s/it]

Episode 1621/2000, real env return = 6461.53


 82%|██████████████████████████████▏      | 1631/2000 [2:19:22<31:44,  5.16s/it]

Episode 1631/2000, real env return = 6338.44


 82%|██████████████████████████████▎      | 1641/2000 [2:20:13<30:47,  5.15s/it]

Episode 1641/2000, real env return = 6223.18


 83%|██████████████████████████████▌      | 1651/2000 [2:21:05<29:55,  5.14s/it]

Episode 1651/2000, real env return = 6131.50


 83%|██████████████████████████████▋      | 1661/2000 [2:21:57<28:57,  5.13s/it]

Episode 1661/2000, real env return = 6247.08


 84%|██████████████████████████████▉      | 1671/2000 [2:22:48<28:03,  5.12s/it]

Episode 1671/2000, real env return = 6390.98


 84%|███████████████████████████████      | 1681/2000 [2:23:39<27:06,  5.10s/it]

Episode 1681/2000, real env return = 6118.76


 85%|███████████████████████████████▎     | 1691/2000 [2:24:31<26:20,  5.12s/it]

Episode 1691/2000, real env return = 6263.67


 85%|███████████████████████████████▍     | 1701/2000 [2:25:22<25:37,  5.14s/it]

Episode 1701/2000, real env return = 6011.76


 86%|███████████████████████████████▋     | 1711/2000 [2:26:14<24:45,  5.14s/it]

Episode 1711/2000, real env return = 6207.93


 86%|███████████████████████████████▊     | 1721/2000 [2:27:05<23:46,  5.11s/it]

Episode 1721/2000, real env return = 6843.56


 87%|████████████████████████████████     | 1731/2000 [2:27:56<22:52,  5.10s/it]

Episode 1731/2000, real env return = 6073.75


 87%|████████████████████████████████▏    | 1741/2000 [2:28:47<22:21,  5.18s/it]

Episode 1741/2000, real env return = 6479.05


 88%|████████████████████████████████▍    | 1751/2000 [2:29:39<21:27,  5.17s/it]

Episode 1751/2000, real env return = 6468.53


 88%|████████████████████████████████▌    | 1761/2000 [2:30:30<20:45,  5.21s/it]

Episode 1761/2000, real env return = 6871.21


 89%|████████████████████████████████▊    | 1771/2000 [2:31:22<19:35,  5.13s/it]

Episode 1771/2000, real env return = 6729.56


 89%|████████████████████████████████▉    | 1781/2000 [2:32:13<18:40,  5.12s/it]

Episode 1781/2000, real env return = 6564.15


 90%|█████████████████████████████████▏   | 1791/2000 [2:33:04<17:45,  5.10s/it]

Episode 1791/2000, real env return = 6634.41


 90%|█████████████████████████████████▎   | 1801/2000 [2:33:56<16:56,  5.11s/it]

Episode 1801/2000, real env return = 6656.83


 91%|█████████████████████████████████▌   | 1811/2000 [2:34:47<16:03,  5.10s/it]

Episode 1811/2000, real env return = 6028.06


 91%|█████████████████████████████████▋   | 1821/2000 [2:35:39<15:25,  5.17s/it]

Episode 1821/2000, real env return = 6780.00


 92%|█████████████████████████████████▊   | 1831/2000 [2:36:30<14:39,  5.20s/it]

Episode 1831/2000, real env return = 6609.39


 92%|██████████████████████████████████   | 1841/2000 [2:37:21<13:38,  5.15s/it]

Episode 1841/2000, real env return = 6794.88


 93%|██████████████████████████████████▏  | 1851/2000 [2:38:13<12:48,  5.16s/it]

Episode 1851/2000, real env return = 6451.91


 93%|██████████████████████████████████▍  | 1861/2000 [2:39:04<11:56,  5.15s/it]

Episode 1861/2000, real env return = 5056.31


 94%|██████████████████████████████████▌  | 1871/2000 [2:39:56<11:13,  5.22s/it]

Episode 1871/2000, real env return = 6514.62


 94%|██████████████████████████████████▊  | 1881/2000 [2:40:47<10:08,  5.11s/it]

Episode 1881/2000, real env return = 6710.32


 95%|██████████████████████████████████▉  | 1891/2000 [2:41:39<09:21,  5.15s/it]

Episode 1891/2000, real env return = 6674.11


 95%|███████████████████████████████████▏ | 1901/2000 [2:42:30<08:29,  5.14s/it]

Episode 1901/2000, real env return = 6845.82


 96%|███████████████████████████████████▎ | 1911/2000 [2:43:22<07:36,  5.13s/it]

Episode 1911/2000, real env return = 6534.43


 96%|███████████████████████████████████▌ | 1921/2000 [2:44:14<06:47,  5.16s/it]

Episode 1921/2000, real env return = 6847.98


 97%|███████████████████████████████████▋ | 1931/2000 [2:45:05<05:51,  5.10s/it]

Episode 1931/2000, real env return = 6208.80


 97%|███████████████████████████████████▉ | 1941/2000 [2:45:57<05:01,  5.11s/it]

Episode 1941/2000, real env return = 214.21


 98%|████████████████████████████████████ | 1951/2000 [2:46:48<04:09,  5.10s/it]

Episode 1951/2000, real env return = 6750.11


 98%|████████████████████████████████████▎| 1961/2000 [2:47:40<03:20,  5.15s/it]

Episode 1961/2000, real env return = 6654.26


 99%|████████████████████████████████████▍| 1971/2000 [2:48:31<02:29,  5.16s/it]

Episode 1971/2000, real env return = 6910.10


 99%|████████████████████████████████████▋| 1981/2000 [2:49:22<01:37,  5.14s/it]

Episode 1981/2000, real env return = 6957.41


100%|████████████████████████████████████▊| 1991/2000 [2:50:14<00:46,  5.16s/it]

Episode 1991/2000, real env return = 6821.58


100%|█████████████████████████████████████| 2000/2000 [2:51:00<00:00,  5.13s/it]


Training finished.


In [3]:
train2 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<23:29,  1.42it/s]

Episode 1/2000, real env return = -258.37


  1%|▏                                      | 11/2000 [00:25<2:17:02,  4.13s/it]

Episode 11/2000, real env return = -195.90


  1%|▍                                      | 21/2000 [01:16<2:46:28,  5.05s/it]

Episode 21/2000, real env return = -524.57


  2%|▌                                      | 31/2000 [02:07<2:47:41,  5.11s/it]

Episode 31/2000, real env return = -265.07


  2%|▊                                      | 41/2000 [02:58<2:46:26,  5.10s/it]

Episode 41/2000, real env return = -605.34


  3%|▉                                      | 51/2000 [03:48<2:45:00,  5.08s/it]

Episode 51/2000, real env return = -275.39


  3%|█▏                                     | 61/2000 [04:39<2:45:43,  5.13s/it]

Episode 61/2000, real env return = -232.95


  4%|█▍                                     | 71/2000 [05:30<2:44:24,  5.11s/it]

Episode 71/2000, real env return = -198.28


  4%|█▌                                     | 81/2000 [06:21<2:44:11,  5.13s/it]

Episode 81/2000, real env return = -242.66


  5%|█▊                                     | 91/2000 [07:12<2:42:35,  5.11s/it]

Episode 91/2000, real env return = -233.75


  5%|█▉                                    | 101/2000 [08:03<2:41:53,  5.12s/it]

Episode 101/2000, real env return = -206.78


  6%|██                                    | 111/2000 [08:54<2:41:04,  5.12s/it]

Episode 111/2000, real env return = -375.64


  6%|██▎                                   | 121/2000 [09:45<2:40:59,  5.14s/it]

Episode 121/2000, real env return = -357.90


  7%|██▍                                   | 131/2000 [10:37<2:39:30,  5.12s/it]

Episode 131/2000, real env return = -314.31


  7%|██▋                                   | 141/2000 [11:28<2:40:42,  5.19s/it]

Episode 141/2000, real env return = -291.60


  8%|██▊                                   | 151/2000 [12:20<2:40:53,  5.22s/it]

Episode 151/2000, real env return = -350.37


  8%|███                                   | 161/2000 [13:13<2:38:59,  5.19s/it]

Episode 161/2000, real env return = -291.07


  9%|███▏                                  | 171/2000 [14:04<2:36:20,  5.13s/it]

Episode 171/2000, real env return = -373.97


  9%|███▍                                  | 181/2000 [14:56<2:37:34,  5.20s/it]

Episode 181/2000, real env return = -311.96


 10%|███▋                                  | 191/2000 [15:48<2:36:50,  5.20s/it]

Episode 191/2000, real env return = -248.01


 10%|███▊                                  | 201/2000 [16:40<2:35:49,  5.20s/it]

Episode 201/2000, real env return = -338.35


 11%|████                                  | 211/2000 [17:32<2:33:50,  5.16s/it]

Episode 211/2000, real env return = -315.72


 11%|████▏                                 | 221/2000 [18:23<2:32:24,  5.14s/it]

Episode 221/2000, real env return = -245.93


 12%|████▍                                 | 231/2000 [19:15<2:32:29,  5.17s/it]

Episode 231/2000, real env return = -212.72


 12%|████▌                                 | 241/2000 [20:07<2:33:27,  5.23s/it]

Episode 241/2000, real env return = -305.51


 13%|████▊                                 | 251/2000 [21:00<2:31:25,  5.19s/it]

Episode 251/2000, real env return = -276.42


 13%|████▉                                 | 261/2000 [21:52<2:32:18,  5.26s/it]

Episode 261/2000, real env return = -279.30


 14%|█████▏                                | 271/2000 [22:44<2:31:48,  5.27s/it]

Episode 271/2000, real env return = -314.60


 14%|█████▎                                | 281/2000 [23:37<2:28:51,  5.20s/it]

Episode 281/2000, real env return = -178.81


 15%|█████▌                                | 291/2000 [24:29<2:29:42,  5.26s/it]

Episode 291/2000, real env return = -203.53


 15%|█████▋                                | 301/2000 [25:22<2:28:56,  5.26s/it]

Episode 301/2000, real env return = -284.81


 16%|█████▉                                | 311/2000 [26:14<2:26:02,  5.19s/it]

Episode 311/2000, real env return = -199.81


 16%|██████                                | 321/2000 [27:06<2:25:29,  5.20s/it]

Episode 321/2000, real env return = -304.84


 17%|██████▎                               | 331/2000 [27:58<2:24:17,  5.19s/it]

Episode 331/2000, real env return = -218.64


 17%|██████▍                               | 341/2000 [28:51<2:24:31,  5.23s/it]

Episode 341/2000, real env return = -166.68


 18%|██████▋                               | 351/2000 [29:43<2:23:18,  5.21s/it]

Episode 351/2000, real env return = -159.58


 18%|██████▊                               | 361/2000 [30:36<2:23:49,  5.27s/it]

Episode 361/2000, real env return = -164.97


 19%|███████                               | 371/2000 [31:29<2:23:21,  5.28s/it]

Episode 371/2000, real env return = -185.36


 19%|███████▏                              | 381/2000 [32:21<2:20:44,  5.22s/it]

Episode 381/2000, real env return = -192.13


 20%|███████▍                              | 391/2000 [33:13<2:18:42,  5.17s/it]

Episode 391/2000, real env return = -178.56


 20%|███████▌                              | 401/2000 [34:06<2:19:22,  5.23s/it]

Episode 401/2000, real env return = -167.58


 21%|███████▊                              | 411/2000 [34:58<2:18:16,  5.22s/it]

Episode 411/2000, real env return = -96.18


 21%|███████▉                              | 421/2000 [35:50<2:16:45,  5.20s/it]

Episode 421/2000, real env return = -100.53


 22%|████████▏                             | 431/2000 [36:43<2:16:17,  5.21s/it]

Episode 431/2000, real env return = -190.46


 22%|████████▍                             | 441/2000 [37:36<2:16:55,  5.27s/it]

Episode 441/2000, real env return = -225.65


 23%|████████▌                             | 451/2000 [38:28<2:13:49,  5.18s/it]

Episode 451/2000, real env return = -157.35


 23%|████████▊                             | 461/2000 [39:20<2:13:01,  5.19s/it]

Episode 461/2000, real env return = -193.09


 24%|████████▉                             | 471/2000 [40:13<2:13:04,  5.22s/it]

Episode 471/2000, real env return = -149.23


 24%|█████████▏                            | 481/2000 [41:05<2:11:36,  5.20s/it]

Episode 481/2000, real env return = -158.53


 25%|█████████▎                            | 491/2000 [41:57<2:11:01,  5.21s/it]

Episode 491/2000, real env return = -66.22


 25%|█████████▌                            | 501/2000 [42:50<2:09:00,  5.16s/it]

Episode 501/2000, real env return = -186.99


 26%|█████████▋                            | 511/2000 [43:42<2:09:43,  5.23s/it]

Episode 511/2000, real env return = -166.71


 26%|█████████▉                            | 521/2000 [44:35<2:10:36,  5.30s/it]

Episode 521/2000, real env return = -193.20


 27%|██████████                            | 531/2000 [45:28<2:08:14,  5.24s/it]

Episode 531/2000, real env return = -226.88


 27%|██████████▎                           | 541/2000 [46:20<2:07:52,  5.26s/it]

Episode 541/2000, real env return = -198.18


 28%|██████████▍                           | 551/2000 [47:13<2:05:36,  5.20s/it]

Episode 551/2000, real env return = -182.48


 28%|██████████▋                           | 561/2000 [48:05<2:06:21,  5.27s/it]

Episode 561/2000, real env return = -48.72


 29%|██████████▊                           | 571/2000 [48:58<2:04:54,  5.24s/it]

Episode 571/2000, real env return = -242.74


 29%|███████████                           | 581/2000 [49:51<2:04:13,  5.25s/it]

Episode 581/2000, real env return = -305.36


 30%|███████████▏                          | 591/2000 [50:44<2:02:43,  5.23s/it]

Episode 591/2000, real env return = -218.98


 30%|███████████▍                          | 601/2000 [51:37<2:03:43,  5.31s/it]

Episode 601/2000, real env return = -90.33


 31%|███████████▌                          | 611/2000 [52:29<2:01:23,  5.24s/it]

Episode 611/2000, real env return = 251.54


 31%|███████████▊                          | 621/2000 [53:22<2:00:38,  5.25s/it]

Episode 621/2000, real env return = 514.66


 32%|███████████▉                          | 631/2000 [54:14<1:58:28,  5.19s/it]

Episode 631/2000, real env return = 45.45


 32%|████████████▏                         | 641/2000 [55:06<1:57:31,  5.19s/it]

Episode 641/2000, real env return = -202.80


 33%|████████████▎                         | 651/2000 [55:59<1:57:06,  5.21s/it]

Episode 651/2000, real env return = -78.07


 33%|████████████▌                         | 661/2000 [56:51<1:55:00,  5.15s/it]

Episode 661/2000, real env return = 106.12


 34%|████████████▋                         | 671/2000 [57:43<1:55:30,  5.21s/it]

Episode 671/2000, real env return = 248.22


 34%|████████████▉                         | 681/2000 [58:36<1:54:57,  5.23s/it]

Episode 681/2000, real env return = 381.16


 35%|█████████████▏                        | 691/2000 [59:29<1:53:43,  5.21s/it]

Episode 691/2000, real env return = 466.64


 35%|████████████▌                       | 701/2000 [1:00:22<1:54:01,  5.27s/it]

Episode 701/2000, real env return = 476.89


 36%|████████████▊                       | 711/2000 [1:01:14<1:53:23,  5.28s/it]

Episode 711/2000, real env return = 787.80


 36%|████████████▉                       | 721/2000 [1:02:07<1:49:57,  5.16s/it]

Episode 721/2000, real env return = 919.88


 37%|█████████████▏                      | 731/2000 [1:03:00<1:51:02,  5.25s/it]

Episode 731/2000, real env return = 1067.17


 37%|█████████████▎                      | 741/2000 [1:03:52<1:49:59,  5.24s/it]

Episode 741/2000, real env return = 1096.28


 38%|█████████████▌                      | 751/2000 [1:04:45<1:49:24,  5.26s/it]

Episode 751/2000, real env return = 1140.25


 38%|█████████████▋                      | 761/2000 [1:05:37<1:48:39,  5.26s/it]

Episode 761/2000, real env return = 963.71


 39%|█████████████▉                      | 771/2000 [1:06:30<1:48:50,  5.31s/it]

Episode 771/2000, real env return = 1056.89


 39%|██████████████                      | 781/2000 [1:07:22<1:46:08,  5.22s/it]

Episode 781/2000, real env return = 856.47


 40%|██████████████▏                     | 791/2000 [1:08:15<1:46:16,  5.27s/it]

Episode 791/2000, real env return = 690.88


 40%|██████████████▍                     | 801/2000 [1:09:07<1:44:29,  5.23s/it]

Episode 801/2000, real env return = 849.70


 41%|██████████████▌                     | 811/2000 [1:10:00<1:44:18,  5.26s/it]

Episode 811/2000, real env return = 739.76


 41%|██████████████▊                     | 821/2000 [1:10:53<1:44:07,  5.30s/it]

Episode 821/2000, real env return = 851.91


 42%|██████████████▉                     | 831/2000 [1:11:45<1:41:22,  5.20s/it]

Episode 831/2000, real env return = 871.20


 42%|███████████████▏                    | 841/2000 [1:12:38<1:41:56,  5.28s/it]

Episode 841/2000, real env return = 899.32


 43%|███████████████▎                    | 851/2000 [1:13:30<1:41:12,  5.29s/it]

Episode 851/2000, real env return = 900.18


 43%|███████████████▍                    | 861/2000 [1:14:23<1:39:26,  5.24s/it]

Episode 861/2000, real env return = 892.04


 44%|███████████████▋                    | 871/2000 [1:15:15<1:38:26,  5.23s/it]

Episode 871/2000, real env return = 1017.15


 44%|███████████████▊                    | 881/2000 [1:16:08<1:38:21,  5.27s/it]

Episode 881/2000, real env return = 1008.11


 45%|████████████████                    | 891/2000 [1:17:02<1:40:57,  5.46s/it]

Episode 891/2000, real env return = 1004.71


 45%|████████████████▏                   | 901/2000 [1:17:53<1:34:54,  5.18s/it]

Episode 901/2000, real env return = 972.28


 46%|████████████████▍                   | 911/2000 [1:18:45<1:33:44,  5.16s/it]

Episode 911/2000, real env return = 1048.82


 46%|████████████████▌                   | 921/2000 [1:19:36<1:32:21,  5.14s/it]

Episode 921/2000, real env return = 1089.89


 47%|████████████████▊                   | 931/2000 [1:20:28<1:31:09,  5.12s/it]

Episode 931/2000, real env return = 1089.94


 47%|████████████████▉                   | 941/2000 [1:21:19<1:30:40,  5.14s/it]

Episode 941/2000, real env return = 1121.49


 48%|█████████████████                   | 951/2000 [1:22:10<1:29:02,  5.09s/it]

Episode 951/2000, real env return = 1109.50


 48%|█████████████████▎                  | 961/2000 [1:23:01<1:28:01,  5.08s/it]

Episode 961/2000, real env return = 1051.96


 49%|█████████████████▍                  | 971/2000 [1:23:53<1:28:21,  5.15s/it]

Episode 971/2000, real env return = 1110.07


 49%|█████████████████▋                  | 981/2000 [1:24:44<1:27:37,  5.16s/it]

Episode 981/2000, real env return = 1162.60


 50%|█████████████████▊                  | 991/2000 [1:25:36<1:26:29,  5.14s/it]

Episode 991/2000, real env return = 1088.68


 50%|█████████████████▌                 | 1001/2000 [1:26:27<1:25:54,  5.16s/it]

Episode 1001/2000, real env return = 1125.64


 51%|█████████████████▋                 | 1011/2000 [1:27:18<1:24:22,  5.12s/it]

Episode 1011/2000, real env return = 1105.65


 51%|█████████████████▊                 | 1021/2000 [1:28:09<1:23:11,  5.10s/it]

Episode 1021/2000, real env return = 1045.57


 52%|██████████████████                 | 1031/2000 [1:29:01<1:23:23,  5.16s/it]

Episode 1031/2000, real env return = 1074.74


 52%|██████████████████▏                | 1041/2000 [1:29:52<1:21:59,  5.13s/it]

Episode 1041/2000, real env return = 1020.12


 53%|██████████████████▍                | 1051/2000 [1:30:43<1:19:55,  5.05s/it]

Episode 1051/2000, real env return = 1079.72


 53%|██████████████████▌                | 1061/2000 [1:31:34<1:20:21,  5.13s/it]

Episode 1061/2000, real env return = 1055.41


 54%|██████████████████▋                | 1071/2000 [1:32:26<1:18:58,  5.10s/it]

Episode 1071/2000, real env return = 1042.40


 54%|██████████████████▉                | 1081/2000 [1:33:17<1:18:08,  5.10s/it]

Episode 1081/2000, real env return = 1084.81


 55%|███████████████████                | 1091/2000 [1:34:09<1:19:10,  5.23s/it]

Episode 1091/2000, real env return = 1133.34


 55%|███████████████████▎               | 1101/2000 [1:35:00<1:16:57,  5.14s/it]

Episode 1101/2000, real env return = 1179.63


 56%|███████████████████▍               | 1111/2000 [1:35:51<1:16:25,  5.16s/it]

Episode 1111/2000, real env return = 1086.37


 56%|███████████████████▌               | 1121/2000 [1:36:42<1:16:53,  5.25s/it]

Episode 1121/2000, real env return = 1071.88


 57%|███████████████████▊               | 1131/2000 [1:37:34<1:15:39,  5.22s/it]

Episode 1131/2000, real env return = 1155.06


 57%|███████████████████▉               | 1141/2000 [1:38:26<1:14:04,  5.17s/it]

Episode 1141/2000, real env return = 1064.43


 58%|████████████████████▏              | 1151/2000 [1:39:17<1:12:52,  5.15s/it]

Episode 1151/2000, real env return = 1091.67


 58%|████████████████████▎              | 1161/2000 [1:40:09<1:11:38,  5.12s/it]

Episode 1161/2000, real env return = 1093.15


 59%|████████████████████▍              | 1171/2000 [1:41:00<1:11:08,  5.15s/it]

Episode 1171/2000, real env return = 1125.62


 59%|████████████████████▋              | 1181/2000 [1:41:51<1:09:30,  5.09s/it]

Episode 1181/2000, real env return = 1124.83


 60%|████████████████████▊              | 1191/2000 [1:42:43<1:09:38,  5.17s/it]

Episode 1191/2000, real env return = 1027.68


 60%|█████████████████████              | 1201/2000 [1:43:35<1:08:17,  5.13s/it]

Episode 1201/2000, real env return = 1131.80


 61%|█████████████████████▏             | 1211/2000 [1:44:27<1:08:08,  5.18s/it]

Episode 1211/2000, real env return = 1094.42


 61%|█████████████████████▎             | 1221/2000 [1:45:18<1:06:22,  5.11s/it]

Episode 1221/2000, real env return = 1119.84


 62%|█████████████████████▌             | 1231/2000 [1:46:10<1:05:30,  5.11s/it]

Episode 1231/2000, real env return = 1087.67


 62%|█████████████████████▋             | 1241/2000 [1:47:01<1:04:45,  5.12s/it]

Episode 1241/2000, real env return = 1126.72


 63%|█████████████████████▉             | 1251/2000 [1:47:52<1:04:59,  5.21s/it]

Episode 1251/2000, real env return = 1132.42


 63%|██████████████████████             | 1261/2000 [1:48:46<1:06:24,  5.39s/it]

Episode 1261/2000, real env return = 1142.08


 64%|██████████████████████▏            | 1271/2000 [1:49:38<1:03:50,  5.25s/it]

Episode 1271/2000, real env return = 1176.44


 64%|██████████████████████▍            | 1281/2000 [1:50:30<1:02:43,  5.23s/it]

Episode 1281/2000, real env return = 1186.45


 65%|██████████████████████▌            | 1291/2000 [1:51:23<1:02:51,  5.32s/it]

Episode 1291/2000, real env return = 1100.93


 65%|██████████████████████▊            | 1301/2000 [1:52:16<1:01:20,  5.26s/it]

Episode 1301/2000, real env return = 1153.75


 66%|██████████████████████▉            | 1311/2000 [1:53:08<1:00:31,  5.27s/it]

Episode 1311/2000, real env return = 1110.18


 66%|████████████████████████▍            | 1321/2000 [1:54:01<59:41,  5.27s/it]

Episode 1321/2000, real env return = 1085.64


 67%|████████████████████████▌            | 1331/2000 [1:54:54<59:03,  5.30s/it]

Episode 1331/2000, real env return = 1146.16


 67%|████████████████████████▊            | 1341/2000 [1:55:46<58:05,  5.29s/it]

Episode 1341/2000, real env return = 1137.81


 68%|████████████████████████▉            | 1351/2000 [1:56:38<57:27,  5.31s/it]

Episode 1351/2000, real env return = 1176.49


 68%|█████████████████████████▏           | 1361/2000 [1:57:31<56:01,  5.26s/it]

Episode 1361/2000, real env return = 1159.05


 69%|█████████████████████████▎           | 1371/2000 [1:58:23<54:50,  5.23s/it]

Episode 1371/2000, real env return = 1157.89


 69%|█████████████████████████▌           | 1381/2000 [1:59:16<54:14,  5.26s/it]

Episode 1381/2000, real env return = 1178.81


 70%|█████████████████████████▋           | 1391/2000 [2:00:08<52:42,  5.19s/it]

Episode 1391/2000, real env return = 1183.63


 70%|█████████████████████████▉           | 1401/2000 [2:01:01<52:25,  5.25s/it]

Episode 1401/2000, real env return = 1099.56


 71%|██████████████████████████           | 1411/2000 [2:01:53<50:58,  5.19s/it]

Episode 1411/2000, real env return = 1176.27


 71%|██████████████████████████▎          | 1421/2000 [2:02:46<50:26,  5.23s/it]

Episode 1421/2000, real env return = 1127.84


 72%|██████████████████████████▍          | 1431/2000 [2:03:39<50:17,  5.30s/it]

Episode 1431/2000, real env return = 1113.44


 72%|██████████████████████████▋          | 1441/2000 [2:04:31<49:06,  5.27s/it]

Episode 1441/2000, real env return = 1023.29


 73%|██████████████████████████▊          | 1451/2000 [2:05:23<47:12,  5.16s/it]

Episode 1451/2000, real env return = 1130.97


 73%|███████████████████████████          | 1461/2000 [2:06:16<47:20,  5.27s/it]

Episode 1461/2000, real env return = 1063.04


 74%|███████████████████████████▏         | 1471/2000 [2:07:08<46:11,  5.24s/it]

Episode 1471/2000, real env return = 1140.12


 74%|███████████████████████████▍         | 1481/2000 [2:08:01<45:23,  5.25s/it]

Episode 1481/2000, real env return = 1050.56


 75%|███████████████████████████▌         | 1491/2000 [2:08:54<44:15,  5.22s/it]

Episode 1491/2000, real env return = 1105.33


 75%|███████████████████████████▊         | 1501/2000 [2:09:46<42:54,  5.16s/it]

Episode 1501/2000, real env return = 1188.31


 76%|███████████████████████████▉         | 1511/2000 [2:10:39<42:47,  5.25s/it]

Episode 1511/2000, real env return = 1208.17


 76%|████████████████████████████▏        | 1521/2000 [2:11:31<41:25,  5.19s/it]

Episode 1521/2000, real env return = 1212.48


 77%|████████████████████████████▎        | 1531/2000 [2:12:24<40:47,  5.22s/it]

Episode 1531/2000, real env return = 1132.35


 77%|████████████████████████████▌        | 1541/2000 [2:13:16<39:58,  5.23s/it]

Episode 1541/2000, real env return = 1212.64


 78%|████████████████████████████▋        | 1551/2000 [2:14:09<39:13,  5.24s/it]

Episode 1551/2000, real env return = 1271.69


 78%|████████████████████████████▉        | 1561/2000 [2:15:02<38:53,  5.31s/it]

Episode 1561/2000, real env return = 1392.82


 79%|█████████████████████████████        | 1571/2000 [2:15:55<37:58,  5.31s/it]

Episode 1571/2000, real env return = 1300.11


 79%|█████████████████████████████▏       | 1581/2000 [2:16:47<36:47,  5.27s/it]

Episode 1581/2000, real env return = 1223.51


 80%|█████████████████████████████▍       | 1591/2000 [2:17:40<35:38,  5.23s/it]

Episode 1591/2000, real env return = 1323.16


 80%|█████████████████████████████▌       | 1601/2000 [2:18:33<35:06,  5.28s/it]

Episode 1601/2000, real env return = 1290.22


 81%|█████████████████████████████▊       | 1611/2000 [2:19:25<33:54,  5.23s/it]

Episode 1611/2000, real env return = 1329.84


 81%|█████████████████████████████▉       | 1621/2000 [2:20:18<33:27,  5.30s/it]

Episode 1621/2000, real env return = 1346.57


 82%|██████████████████████████████▏      | 1631/2000 [2:21:10<32:12,  5.24s/it]

Episode 1631/2000, real env return = 1285.58


 82%|██████████████████████████████▎      | 1641/2000 [2:22:04<33:51,  5.66s/it]

Episode 1641/2000, real env return = 1274.87


 83%|██████████████████████████████▌      | 1651/2000 [2:22:58<30:57,  5.32s/it]

Episode 1651/2000, real env return = 1337.91


 83%|██████████████████████████████▋      | 1661/2000 [2:23:50<29:37,  5.24s/it]

Episode 1661/2000, real env return = 1365.39


 84%|██████████████████████████████▉      | 1671/2000 [2:24:42<28:20,  5.17s/it]

Episode 1671/2000, real env return = 1402.11


 84%|███████████████████████████████      | 1681/2000 [2:25:33<27:39,  5.20s/it]

Episode 1681/2000, real env return = 1340.86


 85%|███████████████████████████████▎     | 1691/2000 [2:26:25<26:57,  5.23s/it]

Episode 1691/2000, real env return = 1350.07


 85%|███████████████████████████████▍     | 1701/2000 [2:27:17<26:00,  5.22s/it]

Episode 1701/2000, real env return = 1355.46


 86%|███████████████████████████████▋     | 1711/2000 [2:28:09<25:01,  5.20s/it]

Episode 1711/2000, real env return = 1348.80


 86%|███████████████████████████████▊     | 1721/2000 [2:29:02<24:13,  5.21s/it]

Episode 1721/2000, real env return = 1340.98


 87%|████████████████████████████████     | 1731/2000 [2:29:54<23:33,  5.26s/it]

Episode 1731/2000, real env return = 1338.77


 87%|████████████████████████████████▏    | 1741/2000 [2:30:46<22:12,  5.14s/it]

Episode 1741/2000, real env return = 1502.87


 88%|████████████████████████████████▍    | 1751/2000 [2:31:38<21:39,  5.22s/it]

Episode 1751/2000, real env return = 1431.61


 88%|████████████████████████████████▌    | 1761/2000 [2:32:29<20:22,  5.11s/it]

Episode 1761/2000, real env return = 1434.51


 89%|████████████████████████████████▊    | 1771/2000 [2:33:20<19:35,  5.13s/it]

Episode 1771/2000, real env return = 1349.76


 89%|████████████████████████████████▉    | 1781/2000 [2:34:11<18:44,  5.13s/it]

Episode 1781/2000, real env return = 1572.14


 90%|█████████████████████████████████▏   | 1791/2000 [2:35:03<17:50,  5.12s/it]

Episode 1791/2000, real env return = 1552.48


 90%|█████████████████████████████████▎   | 1801/2000 [2:35:54<17:01,  5.14s/it]

Episode 1801/2000, real env return = 1484.61


 91%|█████████████████████████████████▌   | 1811/2000 [2:36:45<16:18,  5.18s/it]

Episode 1811/2000, real env return = 1545.88


 91%|█████████████████████████████████▋   | 1821/2000 [2:37:37<15:33,  5.22s/it]

Episode 1821/2000, real env return = 1463.48


 92%|█████████████████████████████████▊   | 1831/2000 [2:38:29<14:35,  5.18s/it]

Episode 1831/2000, real env return = 1537.05


 92%|██████████████████████████████████   | 1841/2000 [2:39:21<13:37,  5.14s/it]

Episode 1841/2000, real env return = 1538.97


 93%|██████████████████████████████████▏  | 1851/2000 [2:40:12<12:49,  5.16s/it]

Episode 1851/2000, real env return = 1571.31


 93%|██████████████████████████████████▍  | 1861/2000 [2:41:04<11:54,  5.14s/it]

Episode 1861/2000, real env return = 1664.07


 94%|██████████████████████████████████▌  | 1871/2000 [2:41:55<11:01,  5.13s/it]

Episode 1871/2000, real env return = 1597.99


 94%|██████████████████████████████████▊  | 1881/2000 [2:42:46<10:08,  5.12s/it]

Episode 1881/2000, real env return = 1636.21


 95%|██████████████████████████████████▉  | 1891/2000 [2:43:38<09:17,  5.12s/it]

Episode 1891/2000, real env return = 1650.63


 95%|███████████████████████████████████▏ | 1901/2000 [2:44:30<08:32,  5.17s/it]

Episode 1901/2000, real env return = 1746.45


 96%|███████████████████████████████████▎ | 1911/2000 [2:45:22<07:49,  5.27s/it]

Episode 1911/2000, real env return = 1733.46


 96%|███████████████████████████████████▌ | 1921/2000 [2:46:14<06:48,  5.17s/it]

Episode 1921/2000, real env return = 1772.02


 97%|███████████████████████████████████▋ | 1931/2000 [2:47:05<05:56,  5.16s/it]

Episode 1931/2000, real env return = 1804.40


 97%|███████████████████████████████████▉ | 1941/2000 [2:47:57<05:03,  5.14s/it]

Episode 1941/2000, real env return = 1772.75


 98%|████████████████████████████████████ | 1951/2000 [2:48:49<04:14,  5.20s/it]

Episode 1951/2000, real env return = 1748.98


 98%|████████████████████████████████████▎| 1961/2000 [2:49:40<03:22,  5.19s/it]

Episode 1961/2000, real env return = 1719.71


 99%|████████████████████████████████████▍| 1971/2000 [2:50:32<02:31,  5.21s/it]

Episode 1971/2000, real env return = 1758.73


 99%|████████████████████████████████████▋| 1981/2000 [2:51:24<01:38,  5.18s/it]

Episode 1981/2000, real env return = 1742.11


100%|████████████████████████████████████▊| 1991/2000 [2:52:15<00:46,  5.18s/it]

Episode 1991/2000, real env return = 1717.66


100%|█████████████████████████████████████| 2000/2000 [2:53:02<00:00,  5.19s/it]


Training finished.


In [4]:
train3 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<10:32,  3.16it/s]

Episode 1/2000, real env return = -274.56


  1%|▏                                      | 11/2000 [00:16<1:29:54,  2.71s/it]

Episode 11/2000, real env return = -319.37


  1%|▍                                      | 21/2000 [01:05<2:43:53,  4.97s/it]

Episode 21/2000, real env return = -467.89


  2%|▌                                      | 31/2000 [01:56<2:46:21,  5.07s/it]

Episode 31/2000, real env return = -116.64


  2%|▊                                      | 41/2000 [02:47<2:46:29,  5.10s/it]

Episode 41/2000, real env return = -372.62


  3%|▉                                      | 51/2000 [03:39<2:46:21,  5.12s/it]

Episode 51/2000, real env return = -475.22


  3%|█▏                                     | 61/2000 [04:30<2:44:06,  5.08s/it]

Episode 61/2000, real env return = -497.90


  4%|█▍                                     | 71/2000 [05:21<2:42:26,  5.05s/it]

Episode 71/2000, real env return = -481.87


  4%|█▌                                     | 81/2000 [06:11<2:42:11,  5.07s/it]

Episode 81/2000, real env return = -59.84


  5%|█▊                                     | 91/2000 [07:02<2:41:53,  5.09s/it]

Episode 91/2000, real env return = -471.90


  5%|█▉                                    | 101/2000 [07:53<2:41:29,  5.10s/it]

Episode 101/2000, real env return = -394.22


  6%|██                                    | 111/2000 [08:43<2:40:14,  5.09s/it]

Episode 111/2000, real env return = -347.53


  6%|██▎                                   | 121/2000 [09:34<2:38:41,  5.07s/it]

Episode 121/2000, real env return = -373.78


  7%|██▍                                   | 131/2000 [10:25<2:37:05,  5.04s/it]

Episode 131/2000, real env return = -341.63


  7%|██▋                                   | 141/2000 [11:16<2:38:09,  5.10s/it]

Episode 141/2000, real env return = -431.86


  8%|██▊                                   | 151/2000 [12:07<2:37:17,  5.10s/it]

Episode 151/2000, real env return = -463.24


  8%|███                                   | 161/2000 [12:58<2:36:53,  5.12s/it]

Episode 161/2000, real env return = -420.17


  9%|███▏                                  | 171/2000 [13:50<2:35:11,  5.09s/it]

Episode 171/2000, real env return = -366.52


  9%|███▍                                  | 181/2000 [14:41<2:34:26,  5.09s/it]

Episode 181/2000, real env return = -344.79


 10%|███▋                                  | 191/2000 [15:32<2:33:39,  5.10s/it]

Episode 191/2000, real env return = -404.54


 10%|███▊                                  | 201/2000 [16:26<2:49:42,  5.66s/it]

Episode 201/2000, real env return = -318.56


 11%|████                                  | 211/2000 [17:17<2:32:40,  5.12s/it]

Episode 211/2000, real env return = -363.02


 11%|████▏                                 | 221/2000 [18:09<2:31:37,  5.11s/it]

Episode 221/2000, real env return = -356.48


 12%|████▍                                 | 231/2000 [19:00<2:30:27,  5.10s/it]

Episode 231/2000, real env return = -360.99


 12%|████▌                                 | 241/2000 [19:51<2:29:35,  5.10s/it]

Episode 241/2000, real env return = -318.75


 13%|████▊                                 | 251/2000 [20:43<2:29:17,  5.12s/it]

Episode 251/2000, real env return = -178.42


 13%|████▉                                 | 261/2000 [21:35<2:28:28,  5.12s/it]

Episode 261/2000, real env return = -303.38


 14%|█████▏                                | 271/2000 [22:26<2:27:17,  5.11s/it]

Episode 271/2000, real env return = -226.21


 14%|█████▎                                | 281/2000 [23:17<2:27:21,  5.14s/it]

Episode 281/2000, real env return = -186.99


 15%|█████▌                                | 291/2000 [24:08<2:25:56,  5.12s/it]

Episode 291/2000, real env return = -252.81


 15%|█████▋                                | 301/2000 [24:59<2:25:23,  5.13s/it]

Episode 301/2000, real env return = -211.75


 16%|█████▉                                | 311/2000 [25:51<2:26:37,  5.21s/it]

Episode 311/2000, real env return = -200.18


 16%|██████                                | 321/2000 [26:42<2:23:48,  5.14s/it]

Episode 321/2000, real env return = -191.77


 17%|██████▎                               | 331/2000 [27:33<2:22:38,  5.13s/it]

Episode 331/2000, real env return = -233.09


 17%|██████▍                               | 341/2000 [28:25<2:23:05,  5.17s/it]

Episode 341/2000, real env return = -357.01


 18%|██████▋                               | 351/2000 [29:16<2:21:39,  5.15s/it]

Episode 351/2000, real env return = -233.28


 18%|██████▊                               | 361/2000 [30:08<2:22:23,  5.21s/it]

Episode 361/2000, real env return = -225.07


 19%|███████                               | 371/2000 [31:00<2:20:03,  5.16s/it]

Episode 371/2000, real env return = -66.31


 19%|███████▏                              | 381/2000 [31:51<2:17:41,  5.10s/it]

Episode 381/2000, real env return = 65.53


 20%|███████▍                              | 391/2000 [32:43<2:18:49,  5.18s/it]

Episode 391/2000, real env return = -45.91


 20%|███████▌                              | 401/2000 [33:34<2:15:48,  5.10s/it]

Episode 401/2000, real env return = -20.14


 21%|███████▊                              | 411/2000 [34:26<2:16:22,  5.15s/it]

Episode 411/2000, real env return = 271.68


 21%|███████▉                              | 421/2000 [35:17<2:16:14,  5.18s/it]

Episode 421/2000, real env return = 278.60


 22%|████████▏                             | 431/2000 [36:09<2:13:42,  5.11s/it]

Episode 431/2000, real env return = 537.21


 22%|████████▍                             | 441/2000 [37:01<2:14:22,  5.17s/it]

Episode 441/2000, real env return = -100.37


 23%|████████▌                             | 451/2000 [37:52<2:12:01,  5.11s/it]

Episode 451/2000, real env return = 57.93


 23%|████████▊                             | 461/2000 [38:44<2:12:41,  5.17s/it]

Episode 461/2000, real env return = 201.55


 24%|████████▉                             | 471/2000 [39:36<2:11:10,  5.15s/it]

Episode 471/2000, real env return = 1195.58


 24%|█████████▏                            | 481/2000 [40:27<2:10:18,  5.15s/it]

Episode 481/2000, real env return = 1735.43


 25%|█████████▎                            | 491/2000 [41:18<2:10:27,  5.19s/it]

Episode 491/2000, real env return = 1728.53


 25%|█████████▌                            | 501/2000 [42:10<2:08:44,  5.15s/it]

Episode 501/2000, real env return = 1474.56


 26%|█████████▋                            | 511/2000 [43:01<2:06:48,  5.11s/it]

Episode 511/2000, real env return = 1530.89


 26%|█████████▉                            | 521/2000 [43:52<2:06:00,  5.11s/it]

Episode 521/2000, real env return = 2053.85


 27%|██████████                            | 531/2000 [44:43<2:05:26,  5.12s/it]

Episode 531/2000, real env return = 2296.24


 27%|██████████▎                           | 541/2000 [45:35<2:06:43,  5.21s/it]

Episode 541/2000, real env return = 2313.84


 28%|██████████▍                           | 551/2000 [46:26<2:02:43,  5.08s/it]

Episode 551/2000, real env return = -77.17


 28%|██████████▋                           | 561/2000 [47:17<2:02:31,  5.11s/it]

Episode 561/2000, real env return = 580.00


 29%|██████████▊                           | 571/2000 [48:09<2:01:53,  5.12s/it]

Episode 571/2000, real env return = 2559.28


 29%|███████████                           | 581/2000 [49:00<2:01:08,  5.12s/it]

Episode 581/2000, real env return = 2877.20


 30%|███████████▏                          | 591/2000 [49:52<2:00:51,  5.15s/it]

Episode 591/2000, real env return = 1067.71


 30%|███████████▍                          | 601/2000 [50:43<1:59:39,  5.13s/it]

Episode 601/2000, real env return = 3103.39


 31%|███████████▌                          | 611/2000 [51:34<1:59:05,  5.14s/it]

Episode 611/2000, real env return = 2791.73


 31%|███████████▊                          | 621/2000 [52:26<1:59:21,  5.19s/it]

Episode 621/2000, real env return = 2812.60


 32%|███████████▉                          | 631/2000 [53:17<1:57:27,  5.15s/it]

Episode 631/2000, real env return = 3097.12


 32%|████████████▏                         | 641/2000 [54:08<1:55:40,  5.11s/it]

Episode 641/2000, real env return = 3082.06


 33%|████████████▎                         | 651/2000 [54:59<1:54:55,  5.11s/it]

Episode 651/2000, real env return = 2728.03


 33%|████████████▌                         | 661/2000 [55:51<1:55:20,  5.17s/it]

Episode 661/2000, real env return = 2699.20


 34%|████████████▋                         | 671/2000 [56:42<1:52:55,  5.10s/it]

Episode 671/2000, real env return = 2653.79


 34%|████████████▉                         | 681/2000 [57:33<1:51:48,  5.09s/it]

Episode 681/2000, real env return = 2667.19


 35%|█████████████▏                        | 691/2000 [58:24<1:52:00,  5.13s/it]

Episode 691/2000, real env return = 2928.25


 35%|█████████████▎                        | 701/2000 [59:16<1:50:40,  5.11s/it]

Episode 701/2000, real env return = 2685.01


 36%|████████████▊                       | 711/2000 [1:00:07<1:49:50,  5.11s/it]

Episode 711/2000, real env return = 2604.61


 36%|████████████▉                       | 721/2000 [1:00:58<1:48:25,  5.09s/it]

Episode 721/2000, real env return = 2600.38


 37%|█████████████▏                      | 731/2000 [1:01:49<1:48:08,  5.11s/it]

Episode 731/2000, real env return = 2683.77


 37%|█████████████▎                      | 741/2000 [1:02:40<1:48:15,  5.16s/it]

Episode 741/2000, real env return = 2777.60


 38%|█████████████▌                      | 751/2000 [1:03:31<1:46:17,  5.11s/it]

Episode 751/2000, real env return = 2812.84


 38%|█████████████▋                      | 761/2000 [1:04:23<1:45:38,  5.12s/it]

Episode 761/2000, real env return = 2824.92


 39%|█████████████▉                      | 771/2000 [1:05:14<1:46:14,  5.19s/it]

Episode 771/2000, real env return = 2808.41


 39%|██████████████                      | 781/2000 [1:06:06<1:43:58,  5.12s/it]

Episode 781/2000, real env return = 2930.45


 40%|██████████████▏                     | 791/2000 [1:06:57<1:43:11,  5.12s/it]

Episode 791/2000, real env return = 2844.43


 40%|██████████████▍                     | 801/2000 [1:07:49<1:43:26,  5.18s/it]

Episode 801/2000, real env return = 2900.49


 41%|██████████████▌                     | 811/2000 [1:08:41<1:42:13,  5.16s/it]

Episode 811/2000, real env return = 3285.65


 41%|██████████████▊                     | 821/2000 [1:09:33<1:41:39,  5.17s/it]

Episode 821/2000, real env return = 3145.36


 42%|██████████████▉                     | 831/2000 [1:10:25<1:41:32,  5.21s/it]

Episode 831/2000, real env return = 3316.39


 42%|███████████████▏                    | 841/2000 [1:11:16<1:38:49,  5.12s/it]

Episode 841/2000, real env return = 3248.15


 43%|███████████████▎                    | 851/2000 [1:12:08<1:38:36,  5.15s/it]

Episode 851/2000, real env return = 3387.69


 43%|███████████████▍                    | 861/2000 [1:12:59<1:37:08,  5.12s/it]

Episode 861/2000, real env return = 3406.70


 44%|███████████████▋                    | 871/2000 [1:13:51<1:39:00,  5.26s/it]

Episode 871/2000, real env return = 3363.92


 44%|███████████████▊                    | 881/2000 [1:14:42<1:36:40,  5.18s/it]

Episode 881/2000, real env return = 3374.56


 45%|████████████████                    | 891/2000 [1:15:33<1:34:41,  5.12s/it]

Episode 891/2000, real env return = 3438.79


 45%|████████████████▏                   | 901/2000 [1:16:25<1:35:07,  5.19s/it]

Episode 901/2000, real env return = 3527.49


 46%|████████████████▍                   | 911/2000 [1:17:17<1:34:15,  5.19s/it]

Episode 911/2000, real env return = 3572.05


 46%|████████████████▌                   | 921/2000 [1:18:09<1:32:40,  5.15s/it]

Episode 921/2000, real env return = 3556.25


 47%|████████████████▊                   | 931/2000 [1:19:00<1:32:31,  5.19s/it]

Episode 931/2000, real env return = 3372.35


 47%|████████████████▉                   | 941/2000 [1:19:53<1:30:53,  5.15s/it]

Episode 941/2000, real env return = 3689.47


 48%|█████████████████                   | 951/2000 [1:20:45<1:31:51,  5.25s/it]

Episode 951/2000, real env return = 3531.14


 48%|█████████████████▎                  | 961/2000 [1:21:36<1:28:50,  5.13s/it]

Episode 961/2000, real env return = 3550.06


 49%|█████████████████▍                  | 971/2000 [1:22:28<1:27:37,  5.11s/it]

Episode 971/2000, real env return = 3528.86


 49%|█████████████████▋                  | 981/2000 [1:23:19<1:28:20,  5.20s/it]

Episode 981/2000, real env return = 3610.96


 50%|█████████████████▊                  | 991/2000 [1:24:11<1:27:07,  5.18s/it]

Episode 991/2000, real env return = 3672.90


 50%|█████████████████▌                 | 1001/2000 [1:25:03<1:25:42,  5.15s/it]

Episode 1001/2000, real env return = 3654.62


 51%|█████████████████▋                 | 1011/2000 [1:25:54<1:24:54,  5.15s/it]

Episode 1011/2000, real env return = 3595.22


 51%|█████████████████▊                 | 1021/2000 [1:26:46<1:24:30,  5.18s/it]

Episode 1021/2000, real env return = 3777.02


 52%|██████████████████                 | 1031/2000 [1:27:38<1:23:21,  5.16s/it]

Episode 1031/2000, real env return = 3981.36


 52%|██████████████████▏                | 1041/2000 [1:28:29<1:23:15,  5.21s/it]

Episode 1041/2000, real env return = 3709.10


 53%|██████████████████▍                | 1051/2000 [1:29:22<1:23:28,  5.28s/it]

Episode 1051/2000, real env return = 3909.80


 53%|██████████████████▌                | 1061/2000 [1:30:14<1:21:05,  5.18s/it]

Episode 1061/2000, real env return = 3876.97


 54%|██████████████████▋                | 1071/2000 [1:31:05<1:19:19,  5.12s/it]

Episode 1071/2000, real env return = 3886.27


 54%|██████████████████▉                | 1081/2000 [1:31:56<1:18:13,  5.11s/it]

Episode 1081/2000, real env return = 3929.48


 55%|███████████████████                | 1091/2000 [1:32:47<1:17:34,  5.12s/it]

Episode 1091/2000, real env return = 3845.20


 55%|███████████████████▎               | 1101/2000 [1:33:39<1:17:21,  5.16s/it]

Episode 1101/2000, real env return = 4023.66


 56%|███████████████████▍               | 1111/2000 [1:34:30<1:15:33,  5.10s/it]

Episode 1111/2000, real env return = 4197.27


 56%|███████████████████▌               | 1121/2000 [1:35:22<1:15:07,  5.13s/it]

Episode 1121/2000, real env return = 3831.41


 57%|███████████████████▊               | 1131/2000 [1:36:13<1:14:45,  5.16s/it]

Episode 1131/2000, real env return = 3966.92


 57%|███████████████████▉               | 1141/2000 [1:37:05<1:13:43,  5.15s/it]

Episode 1141/2000, real env return = 3851.28


 58%|████████████████████▏              | 1151/2000 [1:37:56<1:12:52,  5.15s/it]

Episode 1151/2000, real env return = 3914.80


 58%|████████████████████▎              | 1161/2000 [1:38:48<1:11:57,  5.15s/it]

Episode 1161/2000, real env return = 3823.54


 59%|████████████████████▍              | 1171/2000 [1:39:39<1:11:29,  5.17s/it]

Episode 1171/2000, real env return = 4103.03


 59%|████████████████████▋              | 1181/2000 [1:40:31<1:11:35,  5.24s/it]

Episode 1181/2000, real env return = 3857.70


 60%|████████████████████▊              | 1191/2000 [1:41:23<1:09:30,  5.15s/it]

Episode 1191/2000, real env return = 3961.24


 60%|█████████████████████              | 1201/2000 [1:42:14<1:08:44,  5.16s/it]

Episode 1201/2000, real env return = 3899.51


 61%|█████████████████████▏             | 1211/2000 [1:43:06<1:07:40,  5.15s/it]

Episode 1211/2000, real env return = 3859.66


 61%|█████████████████████▎             | 1221/2000 [1:43:57<1:06:38,  5.13s/it]

Episode 1221/2000, real env return = 4106.76


 62%|█████████████████████▌             | 1231/2000 [1:44:49<1:05:58,  5.15s/it]

Episode 1231/2000, real env return = 3910.76


 62%|█████████████████████▋             | 1241/2000 [1:45:40<1:04:32,  5.10s/it]

Episode 1241/2000, real env return = 3995.53


 63%|█████████████████████▉             | 1251/2000 [1:46:32<1:03:40,  5.10s/it]

Episode 1251/2000, real env return = 4005.50


 63%|██████████████████████             | 1261/2000 [1:47:24<1:03:44,  5.18s/it]

Episode 1261/2000, real env return = 4177.02


 64%|██████████████████████▏            | 1271/2000 [1:48:15<1:02:49,  5.17s/it]

Episode 1271/2000, real env return = 3896.84


 64%|██████████████████████▍            | 1281/2000 [1:49:07<1:02:22,  5.21s/it]

Episode 1281/2000, real env return = 4083.18


 65%|██████████████████████▌            | 1291/2000 [1:50:00<1:02:09,  5.26s/it]

Episode 1291/2000, real env return = 3998.13


 65%|██████████████████████▊            | 1301/2000 [1:50:52<1:00:52,  5.23s/it]

Episode 1301/2000, real env return = 4156.01


 66%|████████████████████████▎            | 1311/2000 [1:51:44<58:49,  5.12s/it]

Episode 1311/2000, real env return = 4210.67


 66%|████████████████████████▍            | 1321/2000 [1:52:35<58:33,  5.17s/it]

Episode 1321/2000, real env return = 4010.30


 67%|████████████████████████▌            | 1331/2000 [1:53:26<57:48,  5.18s/it]

Episode 1331/2000, real env return = 4003.17


 67%|████████████████████████▊            | 1341/2000 [1:54:18<57:09,  5.20s/it]

Episode 1341/2000, real env return = 4026.42


 68%|████████████████████████▉            | 1351/2000 [1:55:10<55:25,  5.12s/it]

Episode 1351/2000, real env return = 4157.81


 68%|█████████████████████████▏           | 1361/2000 [1:56:01<54:25,  5.11s/it]

Episode 1361/2000, real env return = 4076.44


 69%|█████████████████████████▎           | 1371/2000 [1:56:52<53:49,  5.13s/it]

Episode 1371/2000, real env return = 3990.09


 69%|█████████████████████████▌           | 1381/2000 [1:57:46<55:32,  5.38s/it]

Episode 1381/2000, real env return = 4143.93


 70%|█████████████████████████▋           | 1391/2000 [1:58:37<52:21,  5.16s/it]

Episode 1391/2000, real env return = 4063.76


 70%|█████████████████████████▉           | 1401/2000 [1:59:29<51:50,  5.19s/it]

Episode 1401/2000, real env return = 3996.51


 71%|██████████████████████████           | 1411/2000 [2:00:21<50:39,  5.16s/it]

Episode 1411/2000, real env return = 3993.76


 71%|██████████████████████████▎          | 1421/2000 [2:01:12<49:37,  5.14s/it]

Episode 1421/2000, real env return = 4033.90


 72%|██████████████████████████▍          | 1431/2000 [2:02:03<48:33,  5.12s/it]

Episode 1431/2000, real env return = 4042.96


 72%|██████████████████████████▋          | 1441/2000 [2:02:55<48:14,  5.18s/it]

Episode 1441/2000, real env return = 4272.39


 73%|██████████████████████████▊          | 1451/2000 [2:03:47<47:16,  5.17s/it]

Episode 1451/2000, real env return = 4238.33


 73%|███████████████████████████          | 1461/2000 [2:04:39<46:56,  5.22s/it]

Episode 1461/2000, real env return = 4119.91


 74%|███████████████████████████▏         | 1471/2000 [2:05:31<46:17,  5.25s/it]

Episode 1471/2000, real env return = 4132.26


 74%|███████████████████████████▍         | 1481/2000 [2:06:23<44:41,  5.17s/it]

Episode 1481/2000, real env return = 4139.98


 75%|███████████████████████████▌         | 1491/2000 [2:07:14<43:57,  5.18s/it]

Episode 1491/2000, real env return = 4224.97


 75%|███████████████████████████▊         | 1501/2000 [2:08:06<42:48,  5.15s/it]

Episode 1501/2000, real env return = 4292.05


 76%|███████████████████████████▉         | 1511/2000 [2:08:57<42:05,  5.17s/it]

Episode 1511/2000, real env return = 4184.02


 76%|████████████████████████████▏        | 1521/2000 [2:09:49<41:05,  5.15s/it]

Episode 1521/2000, real env return = 4096.79


 77%|████████████████████████████▎        | 1531/2000 [2:10:41<40:35,  5.19s/it]

Episode 1531/2000, real env return = 4216.53


 77%|████████████████████████████▌        | 1541/2000 [2:11:34<39:43,  5.19s/it]

Episode 1541/2000, real env return = 4233.91


 78%|████████████████████████████▋        | 1551/2000 [2:12:25<38:39,  5.17s/it]

Episode 1551/2000, real env return = 4215.23


 78%|████████████████████████████▉        | 1561/2000 [2:13:17<38:11,  5.22s/it]

Episode 1561/2000, real env return = 4187.34


 79%|█████████████████████████████        | 1571/2000 [2:14:09<36:49,  5.15s/it]

Episode 1571/2000, real env return = 4158.62


 79%|█████████████████████████████▏       | 1581/2000 [2:15:00<36:10,  5.18s/it]

Episode 1581/2000, real env return = 4160.30


 80%|█████████████████████████████▍       | 1591/2000 [2:15:51<34:37,  5.08s/it]

Episode 1591/2000, real env return = 4100.25


 80%|█████████████████████████████▌       | 1601/2000 [2:16:43<34:20,  5.16s/it]

Episode 1601/2000, real env return = 4098.35


 81%|█████████████████████████████▊       | 1611/2000 [2:17:34<33:31,  5.17s/it]

Episode 1611/2000, real env return = 4185.04


 81%|█████████████████████████████▉       | 1621/2000 [2:18:26<32:27,  5.14s/it]

Episode 1621/2000, real env return = 4122.36


 82%|██████████████████████████████▏      | 1631/2000 [2:19:18<31:54,  5.19s/it]

Episode 1631/2000, real env return = 4221.79


 82%|██████████████████████████████▎      | 1641/2000 [2:20:09<30:46,  5.14s/it]

Episode 1641/2000, real env return = 4200.69


 83%|██████████████████████████████▌      | 1651/2000 [2:21:01<29:58,  5.15s/it]

Episode 1651/2000, real env return = 4315.16


 83%|██████████████████████████████▋      | 1661/2000 [2:21:52<28:46,  5.09s/it]

Episode 1661/2000, real env return = 4434.03


 84%|██████████████████████████████▉      | 1671/2000 [2:22:43<27:52,  5.08s/it]

Episode 1671/2000, real env return = 4278.39


 84%|███████████████████████████████      | 1681/2000 [2:23:35<27:17,  5.13s/it]

Episode 1681/2000, real env return = 4258.15


 85%|███████████████████████████████▎     | 1691/2000 [2:24:26<26:22,  5.12s/it]

Episode 1691/2000, real env return = 4379.61


 85%|███████████████████████████████▍     | 1701/2000 [2:25:17<25:50,  5.18s/it]

Episode 1701/2000, real env return = 4257.01


 86%|███████████████████████████████▋     | 1711/2000 [2:26:09<24:44,  5.14s/it]

Episode 1711/2000, real env return = 4247.19


 86%|███████████████████████████████▊     | 1721/2000 [2:27:01<24:22,  5.24s/it]

Episode 1721/2000, real env return = 4375.17


 87%|████████████████████████████████     | 1731/2000 [2:27:52<23:04,  5.15s/it]

Episode 1731/2000, real env return = 4142.17


 87%|████████████████████████████████▏    | 1741/2000 [2:28:43<22:08,  5.13s/it]

Episode 1741/2000, real env return = 4166.11


 88%|████████████████████████████████▍    | 1751/2000 [2:29:35<21:17,  5.13s/it]

Episode 1751/2000, real env return = 4277.55


 88%|████████████████████████████████▌    | 1761/2000 [2:30:27<20:40,  5.19s/it]

Episode 1761/2000, real env return = 4262.45


 89%|████████████████████████████████▊    | 1771/2000 [2:31:18<19:37,  5.14s/it]

Episode 1771/2000, real env return = 4187.88


 89%|████████████████████████████████▉    | 1781/2000 [2:32:10<19:03,  5.22s/it]

Episode 1781/2000, real env return = 4560.30


 90%|█████████████████████████████████▏   | 1791/2000 [2:33:02<17:47,  5.11s/it]

Episode 1791/2000, real env return = 4111.78


 90%|█████████████████████████████████▎   | 1801/2000 [2:33:53<17:08,  5.17s/it]

Episode 1801/2000, real env return = 4262.66


 91%|█████████████████████████████████▌   | 1811/2000 [2:34:45<16:22,  5.20s/it]

Episode 1811/2000, real env return = 4178.63


 91%|█████████████████████████████████▋   | 1821/2000 [2:35:37<15:39,  5.25s/it]

Episode 1821/2000, real env return = 4324.02


 92%|█████████████████████████████████▊   | 1831/2000 [2:36:28<14:26,  5.13s/it]

Episode 1831/2000, real env return = 4165.05


 92%|██████████████████████████████████   | 1841/2000 [2:37:20<13:36,  5.13s/it]

Episode 1841/2000, real env return = 4355.72


 93%|██████████████████████████████████▏  | 1851/2000 [2:38:11<12:46,  5.14s/it]

Episode 1851/2000, real env return = 3997.16


 93%|██████████████████████████████████▍  | 1861/2000 [2:39:03<12:03,  5.21s/it]

Episode 1861/2000, real env return = 4150.70


 94%|██████████████████████████████████▌  | 1871/2000 [2:39:55<11:22,  5.29s/it]

Episode 1871/2000, real env return = 4327.52


 94%|██████████████████████████████████▊  | 1881/2000 [2:40:48<10:33,  5.32s/it]

Episode 1881/2000, real env return = 4335.75


 95%|██████████████████████████████████▉  | 1891/2000 [2:41:39<09:16,  5.11s/it]

Episode 1891/2000, real env return = 4104.87


 95%|███████████████████████████████████▏ | 1901/2000 [2:42:31<08:31,  5.17s/it]

Episode 1901/2000, real env return = 4269.18


 96%|███████████████████████████████████▎ | 1911/2000 [2:43:22<07:36,  5.13s/it]

Episode 1911/2000, real env return = 4235.40


 96%|███████████████████████████████████▌ | 1921/2000 [2:44:14<06:43,  5.11s/it]

Episode 1921/2000, real env return = 3987.66


 97%|███████████████████████████████████▋ | 1931/2000 [2:45:05<05:54,  5.14s/it]

Episode 1931/2000, real env return = 4226.95


 97%|███████████████████████████████████▉ | 1941/2000 [2:45:57<05:04,  5.17s/it]

Episode 1941/2000, real env return = 4224.10


 98%|████████████████████████████████████ | 1951/2000 [2:46:49<04:11,  5.13s/it]

Episode 1951/2000, real env return = 4418.17


 98%|████████████████████████████████████▎| 1961/2000 [2:47:41<03:23,  5.21s/it]

Episode 1961/2000, real env return = 4334.48


 99%|████████████████████████████████████▍| 1971/2000 [2:48:32<02:29,  5.16s/it]

Episode 1971/2000, real env return = 4397.72


 99%|████████████████████████████████████▋| 1981/2000 [2:49:24<01:37,  5.15s/it]

Episode 1981/2000, real env return = 4320.54


100%|████████████████████████████████████▊| 1991/2000 [2:50:15<00:46,  5.14s/it]

Episode 1991/2000, real env return = 4144.32


100%|█████████████████████████████████████| 2000/2000 [2:51:01<00:00,  5.13s/it]


Training finished.


In [5]:
train4 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<10:42,  3.11it/s]

Episode 1/2000, real env return = -187.45


  1%|▏                                      | 11/2000 [00:15<1:29:39,  2.70s/it]

Episode 11/2000, real env return = -388.68


  1%|▍                                      | 21/2000 [01:04<2:45:40,  5.02s/it]

Episode 21/2000, real env return = -348.66


  2%|▌                                      | 31/2000 [01:55<2:48:30,  5.13s/it]

Episode 31/2000, real env return = -155.37


  2%|▊                                      | 41/2000 [02:46<2:47:16,  5.12s/it]

Episode 41/2000, real env return = -377.85


  3%|▉                                      | 51/2000 [03:36<2:47:03,  5.14s/it]

Episode 51/2000, real env return = -416.34


  3%|█▏                                     | 61/2000 [04:28<2:46:25,  5.15s/it]

Episode 61/2000, real env return = -437.94


  4%|█▍                                     | 71/2000 [05:18<2:44:55,  5.13s/it]

Episode 71/2000, real env return = -427.70


  4%|█▌                                     | 81/2000 [06:09<2:43:32,  5.11s/it]

Episode 81/2000, real env return = -431.48


  5%|█▊                                     | 91/2000 [07:00<2:42:19,  5.10s/it]

Episode 91/2000, real env return = -69.53


  5%|█▉                                    | 101/2000 [07:51<2:41:13,  5.09s/it]

Episode 101/2000, real env return = -420.61


  6%|██                                    | 111/2000 [08:43<2:41:25,  5.13s/it]

Episode 111/2000, real env return = -537.35


  6%|██▎                                   | 121/2000 [09:34<2:40:47,  5.13s/it]

Episode 121/2000, real env return = -464.62


  7%|██▍                                   | 131/2000 [10:25<2:39:48,  5.13s/it]

Episode 131/2000, real env return = -462.50


  7%|██▋                                   | 141/2000 [11:17<2:37:51,  5.10s/it]

Episode 141/2000, real env return = -363.53


  8%|██▊                                   | 151/2000 [12:08<2:37:57,  5.13s/it]

Episode 151/2000, real env return = -360.12


  8%|███                                   | 161/2000 [12:59<2:37:11,  5.13s/it]

Episode 161/2000, real env return = -324.61


  9%|███▏                                  | 171/2000 [13:51<2:38:37,  5.20s/it]

Episode 171/2000, real env return = -376.38


  9%|███▍                                  | 181/2000 [14:43<2:37:44,  5.20s/it]

Episode 181/2000, real env return = -358.33


 10%|███▋                                  | 191/2000 [15:35<2:35:10,  5.15s/it]

Episode 191/2000, real env return = -390.14


 10%|███▊                                  | 201/2000 [16:27<2:34:19,  5.15s/it]

Episode 201/2000, real env return = -338.27


 11%|████                                  | 211/2000 [17:18<2:32:32,  5.12s/it]

Episode 211/2000, real env return = -333.29


 11%|████▏                                 | 221/2000 [18:10<2:34:07,  5.20s/it]

Episode 221/2000, real env return = -322.62


 12%|████▍                                 | 231/2000 [19:01<2:31:25,  5.14s/it]

Episode 231/2000, real env return = -298.83


 12%|████▌                                 | 241/2000 [19:53<2:31:33,  5.17s/it]

Episode 241/2000, real env return = -242.12


 13%|████▊                                 | 251/2000 [20:45<2:31:15,  5.19s/it]

Episode 251/2000, real env return = -178.99


 13%|████▉                                 | 261/2000 [21:37<2:30:37,  5.20s/it]

Episode 261/2000, real env return = -195.73


 14%|█████▏                                | 271/2000 [22:30<2:30:15,  5.21s/it]

Episode 271/2000, real env return = -167.95


 14%|█████▎                                | 281/2000 [23:21<2:27:29,  5.15s/it]

Episode 281/2000, real env return = -215.64


 15%|█████▌                                | 291/2000 [24:13<2:26:44,  5.15s/it]

Episode 291/2000, real env return = -182.79


 15%|█████▋                                | 301/2000 [25:05<2:29:39,  5.29s/it]

Episode 301/2000, real env return = -235.56


 16%|█████▉                                | 311/2000 [25:57<2:26:56,  5.22s/it]

Episode 311/2000, real env return = -169.49


 16%|██████                                | 321/2000 [26:49<2:24:33,  5.17s/it]

Episode 321/2000, real env return = -312.31


 17%|██████▎                               | 331/2000 [27:41<2:24:27,  5.19s/it]

Episode 331/2000, real env return = -165.73


 17%|██████▍                               | 341/2000 [28:33<2:23:17,  5.18s/it]

Episode 341/2000, real env return = -191.72


 18%|██████▋                               | 351/2000 [29:25<2:23:46,  5.23s/it]

Episode 351/2000, real env return = -107.87


 18%|██████▊                               | 361/2000 [30:17<2:20:38,  5.15s/it]

Episode 361/2000, real env return = -138.64


 19%|███████                               | 371/2000 [31:09<2:22:34,  5.25s/it]

Episode 371/2000, real env return = -126.24


 19%|███████▏                              | 381/2000 [32:01<2:20:38,  5.21s/it]

Episode 381/2000, real env return = -324.61


 20%|███████▍                              | 391/2000 [32:53<2:19:30,  5.20s/it]

Episode 391/2000, real env return = -223.71


 20%|███████▌                              | 401/2000 [33:46<2:19:47,  5.25s/it]

Episode 401/2000, real env return = -175.55


 21%|███████▊                              | 411/2000 [34:38<2:19:08,  5.25s/it]

Episode 411/2000, real env return = -196.21


 21%|███████▉                              | 421/2000 [35:31<2:18:12,  5.25s/it]

Episode 421/2000, real env return = -102.50


 22%|████████▏                             | 431/2000 [36:23<2:18:04,  5.28s/it]

Episode 431/2000, real env return = -87.70


 22%|████████▍                             | 441/2000 [37:15<2:15:20,  5.21s/it]

Episode 441/2000, real env return = -168.33


 23%|████████▌                             | 451/2000 [38:07<2:15:36,  5.25s/it]

Episode 451/2000, real env return = -152.86


 23%|████████▊                             | 461/2000 [38:59<2:13:43,  5.21s/it]

Episode 461/2000, real env return = -164.18


 24%|████████▉                             | 471/2000 [39:52<2:15:42,  5.33s/it]

Episode 471/2000, real env return = -137.72


 24%|█████████▏                            | 481/2000 [40:45<2:13:54,  5.29s/it]

Episode 481/2000, real env return = -124.63


 25%|█████████▎                            | 491/2000 [41:37<2:11:06,  5.21s/it]

Episode 491/2000, real env return = -133.34


 25%|█████████▌                            | 501/2000 [42:29<2:09:22,  5.18s/it]

Episode 501/2000, real env return = -151.25


 26%|█████████▋                            | 511/2000 [43:21<2:09:42,  5.23s/it]

Episode 511/2000, real env return = -144.23


 26%|█████████▉                            | 521/2000 [44:13<2:08:44,  5.22s/it]

Episode 521/2000, real env return = -180.31


 27%|██████████                            | 531/2000 [45:06<2:10:38,  5.34s/it]

Episode 531/2000, real env return = -135.25


 27%|██████████▎                           | 541/2000 [45:59<2:08:31,  5.29s/it]

Episode 541/2000, real env return = -152.47


 28%|██████████▍                           | 551/2000 [46:51<2:07:02,  5.26s/it]

Episode 551/2000, real env return = -122.46


 28%|██████████▋                           | 561/2000 [47:43<2:03:58,  5.17s/it]

Episode 561/2000, real env return = -161.56


 29%|██████████▊                           | 571/2000 [48:35<2:04:42,  5.24s/it]

Episode 571/2000, real env return = -102.68


 29%|███████████                           | 581/2000 [49:27<2:03:16,  5.21s/it]

Episode 581/2000, real env return = -55.96


 30%|███████████▏                          | 591/2000 [50:18<1:59:43,  5.10s/it]

Episode 591/2000, real env return = -100.62


 30%|███████████▍                          | 601/2000 [51:09<1:58:44,  5.09s/it]

Episode 601/2000, real env return = -34.72


 31%|███████████▌                          | 611/2000 [52:00<1:57:25,  5.07s/it]

Episode 611/2000, real env return = -124.64


 31%|███████████▊                          | 621/2000 [52:51<1:56:23,  5.06s/it]

Episode 621/2000, real env return = -566.83


 32%|███████████▉                          | 631/2000 [53:42<1:56:26,  5.10s/it]

Episode 631/2000, real env return = 116.36


 32%|████████████▏                         | 641/2000 [54:33<1:55:38,  5.11s/it]

Episode 641/2000, real env return = 138.84


 33%|████████████▎                         | 651/2000 [55:24<1:55:09,  5.12s/it]

Episode 651/2000, real env return = 42.55


 33%|████████████▌                         | 661/2000 [56:15<1:53:58,  5.11s/it]

Episode 661/2000, real env return = 46.08


 34%|████████████▋                         | 671/2000 [57:06<1:54:07,  5.15s/it]

Episode 671/2000, real env return = 34.62


 34%|████████████▉                         | 681/2000 [57:57<1:53:24,  5.16s/it]

Episode 681/2000, real env return = -21.27


 35%|█████████████▏                        | 691/2000 [58:48<1:50:45,  5.08s/it]

Episode 691/2000, real env return = 143.03


 35%|█████████████▎                        | 701/2000 [59:39<1:50:32,  5.11s/it]

Episode 701/2000, real env return = 55.87


 36%|████████████▊                       | 711/2000 [1:00:30<1:49:51,  5.11s/it]

Episode 711/2000, real env return = 74.24


 36%|████████████▉                       | 721/2000 [1:01:21<1:48:31,  5.09s/it]

Episode 721/2000, real env return = 66.33


 37%|█████████████▏                      | 731/2000 [1:02:12<1:46:32,  5.04s/it]

Episode 731/2000, real env return = 75.54


 37%|█████████████▎                      | 741/2000 [1:03:03<1:47:19,  5.11s/it]

Episode 741/2000, real env return = 144.04


 38%|█████████████▌                      | 751/2000 [1:03:54<1:46:23,  5.11s/it]

Episode 751/2000, real env return = 265.23


 38%|█████████████▋                      | 761/2000 [1:04:45<1:44:56,  5.08s/it]

Episode 761/2000, real env return = 575.79


 39%|█████████████▉                      | 771/2000 [1:05:36<1:43:44,  5.06s/it]

Episode 771/2000, real env return = 564.12


 39%|██████████████                      | 781/2000 [1:06:27<1:43:47,  5.11s/it]

Episode 781/2000, real env return = 564.24


 40%|██████████████▏                     | 791/2000 [1:07:18<1:42:34,  5.09s/it]

Episode 791/2000, real env return = 864.82


 40%|██████████████▍                     | 801/2000 [1:08:09<1:41:42,  5.09s/it]

Episode 801/2000, real env return = 840.19


 41%|██████████████▌                     | 811/2000 [1:09:00<1:40:35,  5.08s/it]

Episode 811/2000, real env return = 980.10


 41%|██████████████▊                     | 821/2000 [1:09:51<1:40:25,  5.11s/it]

Episode 821/2000, real env return = 1025.67


 42%|██████████████▉                     | 831/2000 [1:10:42<1:39:48,  5.12s/it]

Episode 831/2000, real env return = 1126.39


 42%|███████████████▏                    | 841/2000 [1:11:33<1:38:25,  5.10s/it]

Episode 841/2000, real env return = 505.80


 43%|███████████████▎                    | 851/2000 [1:12:24<1:37:09,  5.07s/it]

Episode 851/2000, real env return = 551.97


 43%|███████████████▍                    | 861/2000 [1:13:15<1:38:18,  5.18s/it]

Episode 861/2000, real env return = 504.91


 44%|███████████████▋                    | 871/2000 [1:14:06<1:35:04,  5.05s/it]

Episode 871/2000, real env return = 433.62


 44%|███████████████▊                    | 881/2000 [1:14:57<1:34:48,  5.08s/it]

Episode 881/2000, real env return = 457.18


 45%|████████████████                    | 891/2000 [1:15:48<1:33:54,  5.08s/it]

Episode 891/2000, real env return = 518.52


 45%|████████████████▏                   | 901/2000 [1:16:39<1:33:07,  5.08s/it]

Episode 901/2000, real env return = 601.01


 46%|████████████████▍                   | 911/2000 [1:17:30<1:31:43,  5.05s/it]

Episode 911/2000, real env return = 662.25


 46%|████████████████▌                   | 921/2000 [1:18:21<1:31:41,  5.10s/it]

Episode 921/2000, real env return = 701.40


 47%|████████████████▊                   | 931/2000 [1:19:12<1:30:27,  5.08s/it]

Episode 931/2000, real env return = 602.46


 47%|████████████████▉                   | 941/2000 [1:20:03<1:30:05,  5.10s/it]

Episode 941/2000, real env return = 867.98


 48%|█████████████████                   | 951/2000 [1:20:54<1:29:15,  5.11s/it]

Episode 951/2000, real env return = 942.80


 48%|█████████████████▎                  | 961/2000 [1:21:45<1:28:07,  5.09s/it]

Episode 961/2000, real env return = 581.51


 49%|█████████████████▍                  | 971/2000 [1:22:36<1:27:25,  5.10s/it]

Episode 971/2000, real env return = 1182.07


 49%|█████████████████▋                  | 981/2000 [1:23:27<1:26:39,  5.10s/it]

Episode 981/2000, real env return = 1042.04


 50%|█████████████████▊                  | 991/2000 [1:24:17<1:25:19,  5.07s/it]

Episode 991/2000, real env return = 795.31


 50%|█████████████████▌                 | 1001/2000 [1:25:08<1:24:18,  5.06s/it]

Episode 1001/2000, real env return = 746.40


 51%|█████████████████▋                 | 1011/2000 [1:25:59<1:24:38,  5.13s/it]

Episode 1011/2000, real env return = 930.90


 51%|█████████████████▊                 | 1021/2000 [1:26:51<1:23:10,  5.10s/it]

Episode 1021/2000, real env return = 825.22


 52%|██████████████████                 | 1031/2000 [1:27:42<1:22:44,  5.12s/it]

Episode 1031/2000, real env return = 836.74


 52%|██████████████████▏                | 1041/2000 [1:28:33<1:22:32,  5.16s/it]

Episode 1041/2000, real env return = 783.52


 53%|██████████████████▍                | 1051/2000 [1:29:24<1:21:18,  5.14s/it]

Episode 1051/2000, real env return = 834.33


 53%|██████████████████▌                | 1061/2000 [1:30:15<1:19:50,  5.10s/it]

Episode 1061/2000, real env return = 1121.92


 54%|██████████████████▋                | 1071/2000 [1:31:06<1:18:43,  5.08s/it]

Episode 1071/2000, real env return = 1210.73


 54%|██████████████████▉                | 1081/2000 [1:31:57<1:17:46,  5.08s/it]

Episode 1081/2000, real env return = 1047.51


 55%|███████████████████                | 1091/2000 [1:32:48<1:16:35,  5.06s/it]

Episode 1091/2000, real env return = 967.47


 55%|███████████████████▎               | 1101/2000 [1:33:39<1:16:25,  5.10s/it]

Episode 1101/2000, real env return = 1192.14


 56%|███████████████████▍               | 1111/2000 [1:34:30<1:14:58,  5.06s/it]

Episode 1111/2000, real env return = 1124.60


 56%|███████████████████▌               | 1121/2000 [1:35:21<1:15:05,  5.13s/it]

Episode 1121/2000, real env return = 1126.08


 57%|███████████████████▊               | 1131/2000 [1:36:12<1:14:33,  5.15s/it]

Episode 1131/2000, real env return = 1168.00


 57%|███████████████████▉               | 1141/2000 [1:37:03<1:13:20,  5.12s/it]

Episode 1141/2000, real env return = 1167.33


 58%|████████████████████▏              | 1151/2000 [1:37:54<1:12:27,  5.12s/it]

Episode 1151/2000, real env return = 1188.05


 58%|████████████████████▎              | 1161/2000 [1:38:45<1:11:29,  5.11s/it]

Episode 1161/2000, real env return = 1112.67


 59%|████████████████████▍              | 1171/2000 [1:39:36<1:10:14,  5.08s/it]

Episode 1171/2000, real env return = 1207.49


 59%|████████████████████▋              | 1181/2000 [1:40:27<1:08:18,  5.00s/it]

Episode 1181/2000, real env return = 1103.61


 60%|████████████████████▊              | 1191/2000 [1:41:18<1:08:17,  5.07s/it]

Episode 1191/2000, real env return = 1114.25


 60%|█████████████████████              | 1201/2000 [1:42:09<1:07:16,  5.05s/it]

Episode 1201/2000, real env return = 1339.17


 61%|█████████████████████▏             | 1211/2000 [1:42:59<1:06:57,  5.09s/it]

Episode 1211/2000, real env return = 1211.16


 61%|█████████████████████▎             | 1221/2000 [1:43:51<1:06:45,  5.14s/it]

Episode 1221/2000, real env return = 1159.24


 62%|█████████████████████▌             | 1231/2000 [1:44:42<1:05:27,  5.11s/it]

Episode 1231/2000, real env return = 1263.09


 62%|█████████████████████▋             | 1241/2000 [1:45:33<1:04:38,  5.11s/it]

Episode 1241/2000, real env return = 1416.17


 63%|█████████████████████▉             | 1251/2000 [1:46:24<1:03:32,  5.09s/it]

Episode 1251/2000, real env return = 1379.32


 63%|██████████████████████             | 1261/2000 [1:47:15<1:02:45,  5.09s/it]

Episode 1261/2000, real env return = 1414.98


 64%|██████████████████████▏            | 1271/2000 [1:48:06<1:01:38,  5.07s/it]

Episode 1271/2000, real env return = 1458.03


 64%|██████████████████████▍            | 1281/2000 [1:48:57<1:01:21,  5.12s/it]

Episode 1281/2000, real env return = 1480.65


 65%|██████████████████████▌            | 1291/2000 [1:49:48<1:00:18,  5.10s/it]

Episode 1291/2000, real env return = 1413.41


 65%|████████████████████████             | 1301/2000 [1:50:39<59:50,  5.14s/it]

Episode 1301/2000, real env return = 1494.98


 66%|████████████████████████▎            | 1311/2000 [1:51:31<59:13,  5.16s/it]

Episode 1311/2000, real env return = 1628.38


 66%|████████████████████████▍            | 1321/2000 [1:52:21<57:46,  5.10s/it]

Episode 1321/2000, real env return = 1645.71


 67%|████████████████████████▌            | 1331/2000 [1:53:12<56:42,  5.09s/it]

Episode 1331/2000, real env return = 1586.10


 67%|████████████████████████▊            | 1341/2000 [1:54:03<55:47,  5.08s/it]

Episode 1341/2000, real env return = 1561.98


 68%|████████████████████████▉            | 1351/2000 [1:54:54<54:55,  5.08s/it]

Episode 1351/2000, real env return = 1628.81


 68%|█████████████████████████▏           | 1361/2000 [1:55:45<54:12,  5.09s/it]

Episode 1361/2000, real env return = 1681.24


 69%|█████████████████████████▎           | 1371/2000 [1:56:37<53:33,  5.11s/it]

Episode 1371/2000, real env return = 1847.55


 69%|█████████████████████████▌           | 1381/2000 [1:57:28<52:56,  5.13s/it]

Episode 1381/2000, real env return = 2021.40


 70%|█████████████████████████▋           | 1391/2000 [1:58:19<51:54,  5.11s/it]

Episode 1391/2000, real env return = 2015.87


 70%|█████████████████████████▉           | 1401/2000 [1:59:10<51:01,  5.11s/it]

Episode 1401/2000, real env return = 1895.88


 71%|██████████████████████████           | 1411/2000 [2:00:01<49:59,  5.09s/it]

Episode 1411/2000, real env return = 1880.24


 71%|██████████████████████████▎          | 1421/2000 [2:00:52<49:07,  5.09s/it]

Episode 1421/2000, real env return = 2003.18


 72%|██████████████████████████▍          | 1431/2000 [2:01:43<48:05,  5.07s/it]

Episode 1431/2000, real env return = 2033.03


 72%|██████████████████████████▋          | 1441/2000 [2:02:34<47:10,  5.06s/it]

Episode 1441/2000, real env return = 1937.06


 73%|██████████████████████████▊          | 1451/2000 [2:03:25<46:32,  5.09s/it]

Episode 1451/2000, real env return = 2163.44


 73%|███████████████████████████          | 1461/2000 [2:04:16<45:45,  5.09s/it]

Episode 1461/2000, real env return = 2297.20


 74%|███████████████████████████▏         | 1471/2000 [2:05:07<45:06,  5.12s/it]

Episode 1471/2000, real env return = 2072.63


 74%|███████████████████████████▍         | 1481/2000 [2:05:58<44:23,  5.13s/it]

Episode 1481/2000, real env return = 2168.06


 75%|███████████████████████████▌         | 1491/2000 [2:06:49<43:37,  5.14s/it]

Episode 1491/2000, real env return = 2180.55


 75%|███████████████████████████▊         | 1501/2000 [2:07:39<42:11,  5.07s/it]

Episode 1501/2000, real env return = 2238.88


 76%|███████████████████████████▉         | 1511/2000 [2:08:30<41:38,  5.11s/it]

Episode 1511/2000, real env return = 2239.38


 76%|████████████████████████████▏        | 1521/2000 [2:09:21<39:55,  5.00s/it]

Episode 1521/2000, real env return = 2229.82


 77%|████████████████████████████▎        | 1531/2000 [2:10:12<39:28,  5.05s/it]

Episode 1531/2000, real env return = 2321.34


 77%|████████████████████████████▌        | 1541/2000 [2:11:03<39:15,  5.13s/it]

Episode 1541/2000, real env return = 2406.82


 78%|████████████████████████████▋        | 1551/2000 [2:11:54<38:18,  5.12s/it]

Episode 1551/2000, real env return = 2476.67


 78%|████████████████████████████▉        | 1561/2000 [2:12:45<37:07,  5.07s/it]

Episode 1561/2000, real env return = 2509.34


 79%|█████████████████████████████        | 1571/2000 [2:13:36<36:39,  5.13s/it]

Episode 1571/2000, real env return = 2537.43


 79%|█████████████████████████████▏       | 1581/2000 [2:14:26<35:22,  5.07s/it]

Episode 1581/2000, real env return = 2514.87


 80%|█████████████████████████████▍       | 1591/2000 [2:15:17<34:26,  5.05s/it]

Episode 1591/2000, real env return = 2503.85


 80%|█████████████████████████████▌       | 1601/2000 [2:16:08<33:36,  5.05s/it]

Episode 1601/2000, real env return = 2466.69


 81%|█████████████████████████████▊       | 1611/2000 [2:16:59<32:55,  5.08s/it]

Episode 1611/2000, real env return = 2514.58


 81%|█████████████████████████████▉       | 1621/2000 [2:17:50<32:16,  5.11s/it]

Episode 1621/2000, real env return = 2416.57


 82%|██████████████████████████████▏      | 1631/2000 [2:18:41<31:31,  5.13s/it]

Episode 1631/2000, real env return = 2821.15


 82%|██████████████████████████████▎      | 1641/2000 [2:19:32<30:49,  5.15s/it]

Episode 1641/2000, real env return = 2656.86


 83%|██████████████████████████████▌      | 1651/2000 [2:20:23<29:57,  5.15s/it]

Episode 1651/2000, real env return = 2611.84


 83%|██████████████████████████████▋      | 1661/2000 [2:21:14<28:46,  5.09s/it]

Episode 1661/2000, real env return = 2647.32


 84%|██████████████████████████████▉      | 1671/2000 [2:22:04<27:56,  5.10s/it]

Episode 1671/2000, real env return = 2875.57


 84%|███████████████████████████████      | 1681/2000 [2:22:55<27:04,  5.09s/it]

Episode 1681/2000, real env return = 2920.00


 85%|███████████████████████████████▎     | 1691/2000 [2:23:46<26:07,  5.07s/it]

Episode 1691/2000, real env return = 2926.54


 85%|███████████████████████████████▍     | 1701/2000 [2:24:37<25:23,  5.10s/it]

Episode 1701/2000, real env return = 3013.80


 86%|███████████████████████████████▋     | 1711/2000 [2:25:28<24:32,  5.10s/it]

Episode 1711/2000, real env return = 2962.55


 86%|███████████████████████████████▊     | 1721/2000 [2:26:19<23:42,  5.10s/it]

Episode 1721/2000, real env return = 3116.36


 87%|████████████████████████████████     | 1731/2000 [2:27:10<23:09,  5.16s/it]

Episode 1731/2000, real env return = 3105.05


 87%|████████████████████████████████▏    | 1741/2000 [2:28:01<22:02,  5.10s/it]

Episode 1741/2000, real env return = 3071.08


 88%|████████████████████████████████▍    | 1751/2000 [2:28:52<21:07,  5.09s/it]

Episode 1751/2000, real env return = 3241.74


 88%|████████████████████████████████▌    | 1761/2000 [2:29:43<20:09,  5.06s/it]

Episode 1761/2000, real env return = 3246.53


 89%|████████████████████████████████▊    | 1771/2000 [2:30:34<19:14,  5.04s/it]

Episode 1771/2000, real env return = 3235.41


 89%|████████████████████████████████▉    | 1781/2000 [2:31:25<18:39,  5.11s/it]

Episode 1781/2000, real env return = 3318.50


 90%|█████████████████████████████████▏   | 1791/2000 [2:32:16<17:51,  5.13s/it]

Episode 1791/2000, real env return = 3181.99


 90%|█████████████████████████████████▎   | 1801/2000 [2:33:07<16:57,  5.11s/it]

Episode 1801/2000, real env return = 3371.68


 91%|█████████████████████████████████▌   | 1811/2000 [2:33:58<16:12,  5.15s/it]

Episode 1811/2000, real env return = 3239.68


 91%|█████████████████████████████████▋   | 1821/2000 [2:34:49<15:25,  5.17s/it]

Episode 1821/2000, real env return = 3207.44


 92%|█████████████████████████████████▊   | 1831/2000 [2:35:40<14:18,  5.08s/it]

Episode 1831/2000, real env return = 3532.14


 92%|██████████████████████████████████   | 1841/2000 [2:36:31<13:30,  5.10s/it]

Episode 1841/2000, real env return = 3500.65


 93%|██████████████████████████████████▏  | 1851/2000 [2:37:22<12:38,  5.09s/it]

Episode 1851/2000, real env return = 3445.84


 93%|██████████████████████████████████▍  | 1861/2000 [2:38:13<11:44,  5.07s/it]

Episode 1861/2000, real env return = 3570.57


 94%|██████████████████████████████████▌  | 1871/2000 [2:39:04<10:51,  5.05s/it]

Episode 1871/2000, real env return = 3419.14


 94%|██████████████████████████████████▊  | 1881/2000 [2:39:55<10:05,  5.09s/it]

Episode 1881/2000, real env return = 3577.33


 95%|██████████████████████████████████▉  | 1891/2000 [2:40:46<09:20,  5.14s/it]

Episode 1891/2000, real env return = 3540.99


 95%|███████████████████████████████████▏ | 1901/2000 [2:41:37<08:29,  5.14s/it]

Episode 1901/2000, real env return = 3750.18


 96%|███████████████████████████████████▎ | 1911/2000 [2:42:27<07:29,  5.05s/it]

Episode 1911/2000, real env return = 3755.39


 96%|███████████████████████████████████▌ | 1921/2000 [2:43:18<06:40,  5.07s/it]

Episode 1921/2000, real env return = 3826.60


 97%|███████████████████████████████████▋ | 1931/2000 [2:44:09<05:50,  5.09s/it]

Episode 1931/2000, real env return = 3832.31


 97%|███████████████████████████████████▉ | 1941/2000 [2:45:00<04:58,  5.05s/it]

Episode 1941/2000, real env return = 3842.91


 98%|████████████████████████████████████ | 1951/2000 [2:45:51<04:08,  5.07s/it]

Episode 1951/2000, real env return = 4182.17


 98%|████████████████████████████████████▎| 1961/2000 [2:46:43<03:19,  5.13s/it]

Episode 1961/2000, real env return = 3970.53


 99%|████████████████████████████████████▍| 1971/2000 [2:47:34<02:27,  5.08s/it]

Episode 1971/2000, real env return = 3893.91


 99%|████████████████████████████████████▋| 1981/2000 [2:48:25<01:38,  5.16s/it]

Episode 1981/2000, real env return = 3705.60


100%|████████████████████████████████████▊| 1991/2000 [2:49:16<00:46,  5.12s/it]

Episode 1991/2000, real env return = 3986.97


100%|█████████████████████████████████████| 2000/2000 [2:50:01<00:00,  5.10s/it]


Training finished.


In [11]:
train5 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<26:10,  1.27it/s]

Episode 1/2000, real env return = -241.58


  1%|▏                                      | 11/2000 [00:25<2:18:06,  4.17s/it]

Episode 11/2000, real env return = -333.59


  1%|▍                                      | 21/2000 [01:16<2:46:00,  5.03s/it]

Episode 21/2000, real env return = -184.95


  2%|▌                                      | 31/2000 [02:06<2:45:50,  5.05s/it]

Episode 31/2000, real env return = -288.14


  2%|▊                                      | 41/2000 [02:57<2:44:03,  5.02s/it]

Episode 41/2000, real env return = -361.94


  3%|▉                                      | 51/2000 [03:47<2:43:50,  5.04s/it]

Episode 51/2000, real env return = -543.57


  3%|█▏                                     | 61/2000 [04:38<2:43:21,  5.05s/it]

Episode 61/2000, real env return = -526.31


  4%|█▍                                     | 71/2000 [05:28<2:42:20,  5.05s/it]

Episode 71/2000, real env return = -501.88


  4%|█▌                                     | 81/2000 [06:19<2:41:47,  5.06s/it]

Episode 81/2000, real env return = -520.89


  5%|█▊                                     | 91/2000 [07:10<2:42:18,  5.10s/it]

Episode 91/2000, real env return = -456.77


  5%|█▉                                    | 101/2000 [08:01<2:40:55,  5.08s/it]

Episode 101/2000, real env return = -427.78


  6%|██                                    | 111/2000 [08:52<2:41:05,  5.12s/it]

Episode 111/2000, real env return = -491.77


  6%|██▎                                   | 121/2000 [09:42<2:38:48,  5.07s/it]

Episode 121/2000, real env return = -501.57


  7%|██▍                                   | 131/2000 [10:33<2:36:19,  5.02s/it]

Episode 131/2000, real env return = -472.85


  7%|██▋                                   | 141/2000 [11:24<2:36:25,  5.05s/it]

Episode 141/2000, real env return = -439.53


  8%|██▊                                   | 151/2000 [12:14<2:35:58,  5.06s/it]

Episode 151/2000, real env return = -410.02


  8%|███                                   | 161/2000 [13:05<2:35:00,  5.06s/it]

Episode 161/2000, real env return = -437.98


  9%|███▏                                  | 171/2000 [13:56<2:34:53,  5.08s/it]

Episode 171/2000, real env return = -328.39


  9%|███▍                                  | 181/2000 [14:46<2:34:17,  5.09s/it]

Episode 181/2000, real env return = -374.79


 10%|███▋                                  | 191/2000 [15:37<2:34:53,  5.14s/it]

Episode 191/2000, real env return = -472.87


 10%|███▊                                  | 201/2000 [16:28<2:31:41,  5.06s/it]

Episode 201/2000, real env return = -314.64


 11%|████                                  | 211/2000 [17:19<2:32:13,  5.11s/it]

Episode 211/2000, real env return = -222.11


 11%|████▏                                 | 221/2000 [18:10<2:31:07,  5.10s/it]

Episode 221/2000, real env return = -276.36


 12%|████▍                                 | 231/2000 [19:02<2:35:23,  5.27s/it]

Episode 231/2000, real env return = -228.76


 12%|████▌                                 | 241/2000 [19:53<2:29:07,  5.09s/it]

Episode 241/2000, real env return = -162.68


 13%|████▊                                 | 251/2000 [20:44<2:28:38,  5.10s/it]

Episode 251/2000, real env return = -233.16


 13%|████▉                                 | 261/2000 [21:35<2:26:54,  5.07s/it]

Episode 261/2000, real env return = -162.86


 14%|█████▏                                | 271/2000 [22:25<2:25:50,  5.06s/it]

Episode 271/2000, real env return = -234.82


 14%|█████▎                                | 281/2000 [23:16<2:26:44,  5.12s/it]

Episode 281/2000, real env return = -234.25


 15%|█████▌                                | 291/2000 [24:08<2:27:02,  5.16s/it]

Episode 291/2000, real env return = -223.55


 15%|█████▋                                | 301/2000 [24:59<2:25:16,  5.13s/it]

Episode 301/2000, real env return = -316.59


 16%|█████▉                                | 311/2000 [25:50<2:23:15,  5.09s/it]

Episode 311/2000, real env return = -210.50


 16%|██████                                | 321/2000 [26:42<2:24:37,  5.17s/it]

Episode 321/2000, real env return = -158.16


 17%|██████▎                               | 331/2000 [27:33<2:21:25,  5.08s/it]

Episode 331/2000, real env return = -152.74


 17%|██████▍                               | 341/2000 [28:24<2:20:58,  5.10s/it]

Episode 341/2000, real env return = -90.53


 18%|██████▋                               | 351/2000 [29:15<2:19:57,  5.09s/it]

Episode 351/2000, real env return = -189.89


 18%|██████▊                               | 361/2000 [30:07<2:19:40,  5.11s/it]

Episode 361/2000, real env return = -177.80


 19%|███████                               | 371/2000 [30:58<2:19:52,  5.15s/it]

Episode 371/2000, real env return = -145.24


 19%|███████▏                              | 381/2000 [31:49<2:18:19,  5.13s/it]

Episode 381/2000, real env return = -166.69


 20%|███████▍                              | 391/2000 [32:40<2:18:00,  5.15s/it]

Episode 391/2000, real env return = -174.23


 20%|███████▌                              | 401/2000 [33:32<2:17:20,  5.15s/it]

Episode 401/2000, real env return = -209.38


 21%|███████▊                              | 411/2000 [34:23<2:15:05,  5.10s/it]

Episode 411/2000, real env return = -203.06


 21%|███████▉                              | 421/2000 [35:14<2:14:14,  5.10s/it]

Episode 421/2000, real env return = -191.41


 22%|████████▏                             | 431/2000 [36:05<2:15:55,  5.20s/it]

Episode 431/2000, real env return = -163.98


 22%|████████▍                             | 441/2000 [36:56<2:13:36,  5.14s/it]

Episode 441/2000, real env return = -156.79


 23%|████████▌                             | 451/2000 [37:48<2:11:33,  5.10s/it]

Episode 451/2000, real env return = -175.72


 23%|████████▊                             | 461/2000 [38:39<2:13:16,  5.20s/it]

Episode 461/2000, real env return = -181.72


 24%|████████▉                             | 471/2000 [39:31<2:11:06,  5.14s/it]

Episode 471/2000, real env return = -181.39


 24%|█████████▏                            | 481/2000 [40:22<2:09:40,  5.12s/it]

Episode 481/2000, real env return = -129.22


 25%|█████████▎                            | 491/2000 [41:14<2:10:00,  5.17s/it]

Episode 491/2000, real env return = -163.34


 25%|█████████▌                            | 501/2000 [42:05<2:08:08,  5.13s/it]

Episode 501/2000, real env return = -115.64


 26%|█████████▋                            | 511/2000 [42:57<2:08:12,  5.17s/it]

Episode 511/2000, real env return = -154.27


 26%|█████████▉                            | 521/2000 [43:48<2:07:48,  5.18s/it]

Episode 521/2000, real env return = -95.24


 27%|██████████                            | 531/2000 [44:40<2:07:09,  5.19s/it]

Episode 531/2000, real env return = -34.77


 27%|██████████▎                           | 541/2000 [45:31<2:04:23,  5.12s/it]

Episode 541/2000, real env return = -105.19


 28%|██████████▍                           | 551/2000 [46:22<2:03:09,  5.10s/it]

Episode 551/2000, real env return = -99.44


 28%|██████████▋                           | 561/2000 [47:15<2:07:51,  5.33s/it]

Episode 561/2000, real env return = -77.12


 29%|██████████▊                           | 571/2000 [48:07<2:03:06,  5.17s/it]

Episode 571/2000, real env return = -64.84


 29%|███████████                           | 581/2000 [48:58<2:02:09,  5.16s/it]

Episode 581/2000, real env return = -102.09


 30%|███████████▏                          | 591/2000 [49:49<1:59:38,  5.09s/it]

Episode 591/2000, real env return = -82.34


 30%|███████████▍                          | 601/2000 [50:41<2:00:23,  5.16s/it]

Episode 601/2000, real env return = -89.40


 31%|███████████▌                          | 611/2000 [51:33<2:00:07,  5.19s/it]

Episode 611/2000, real env return = -100.60


 31%|███████████▊                          | 621/2000 [52:24<1:57:37,  5.12s/it]

Episode 621/2000, real env return = -95.81


 32%|███████████▉                          | 631/2000 [53:15<1:56:54,  5.12s/it]

Episode 631/2000, real env return = -118.85


 32%|████████████▏                         | 641/2000 [54:06<1:55:42,  5.11s/it]

Episode 641/2000, real env return = -111.81


 33%|████████████▎                         | 651/2000 [54:58<1:57:36,  5.23s/it]

Episode 651/2000, real env return = -91.10


 33%|████████████▌                         | 661/2000 [55:49<1:54:51,  5.15s/it]

Episode 661/2000, real env return = -115.91


 34%|████████████▋                         | 671/2000 [56:40<1:53:36,  5.13s/it]

Episode 671/2000, real env return = -122.13


 34%|████████████▉                         | 681/2000 [57:31<1:52:07,  5.10s/it]

Episode 681/2000, real env return = -115.73


 35%|█████████████▏                        | 691/2000 [58:22<1:51:30,  5.11s/it]

Episode 691/2000, real env return = -115.13


 35%|█████████████▎                        | 701/2000 [59:13<1:49:33,  5.06s/it]

Episode 701/2000, real env return = -116.29


 36%|████████████▊                       | 711/2000 [1:00:05<1:49:55,  5.12s/it]

Episode 711/2000, real env return = -116.49


 36%|████████████▉                       | 721/2000 [1:00:56<1:49:43,  5.15s/it]

Episode 721/2000, real env return = -103.25


 37%|█████████████▏                      | 731/2000 [1:01:47<1:48:43,  5.14s/it]

Episode 731/2000, real env return = -110.80


 37%|█████████████▎                      | 741/2000 [1:02:39<1:48:05,  5.15s/it]

Episode 741/2000, real env return = -113.43


 38%|█████████████▌                      | 751/2000 [1:03:30<1:46:58,  5.14s/it]

Episode 751/2000, real env return = -107.60


 38%|█████████████▋                      | 761/2000 [1:04:22<1:46:47,  5.17s/it]

Episode 761/2000, real env return = -109.88


 39%|█████████████▉                      | 771/2000 [1:05:13<1:45:36,  5.16s/it]

Episode 771/2000, real env return = -103.71


 39%|██████████████                      | 781/2000 [1:06:04<1:44:23,  5.14s/it]

Episode 781/2000, real env return = -117.87


 40%|██████████████▏                     | 791/2000 [1:06:56<1:43:15,  5.12s/it]

Episode 791/2000, real env return = -109.99


 40%|██████████████▍                     | 801/2000 [1:07:47<1:42:04,  5.11s/it]

Episode 801/2000, real env return = -111.93


 41%|██████████████▌                     | 811/2000 [1:08:39<1:42:45,  5.19s/it]

Episode 811/2000, real env return = -117.97


 41%|██████████████▊                     | 821/2000 [1:09:30<1:40:05,  5.09s/it]

Episode 821/2000, real env return = -97.89


 42%|██████████████▉                     | 831/2000 [1:10:21<1:39:37,  5.11s/it]

Episode 831/2000, real env return = -99.11


 42%|███████████████▏                    | 841/2000 [1:11:13<1:39:08,  5.13s/it]

Episode 841/2000, real env return = -90.34


 43%|███████████████▎                    | 851/2000 [1:12:04<1:38:22,  5.14s/it]

Episode 851/2000, real env return = -102.72


 43%|███████████████▍                    | 861/2000 [1:12:56<1:38:21,  5.18s/it]

Episode 861/2000, real env return = -99.15


 44%|███████████████▋                    | 871/2000 [1:13:47<1:36:46,  5.14s/it]

Episode 871/2000, real env return = -106.99


 44%|███████████████▊                    | 881/2000 [1:14:38<1:35:02,  5.10s/it]

Episode 881/2000, real env return = -94.12


 45%|████████████████                    | 891/2000 [1:15:29<1:35:50,  5.19s/it]

Episode 891/2000, real env return = -126.32


 45%|████████████████▏                   | 901/2000 [1:16:21<1:34:15,  5.15s/it]

Episode 901/2000, real env return = -132.59


 46%|████████████████▍                   | 911/2000 [1:17:13<1:34:11,  5.19s/it]

Episode 911/2000, real env return = -100.94


 46%|████████████████▌                   | 921/2000 [1:18:04<1:31:41,  5.10s/it]

Episode 921/2000, real env return = -80.50


 47%|████████████████▊                   | 931/2000 [1:18:55<1:30:44,  5.09s/it]

Episode 931/2000, real env return = -92.22


 47%|████████████████▉                   | 941/2000 [1:19:46<1:30:31,  5.13s/it]

Episode 941/2000, real env return = -45.82


 48%|█████████████████                   | 951/2000 [1:20:37<1:29:10,  5.10s/it]

Episode 951/2000, real env return = -65.61


 48%|█████████████████▎                  | 961/2000 [1:21:29<1:29:25,  5.16s/it]

Episode 961/2000, real env return = 61.33


 49%|█████████████████▍                  | 971/2000 [1:22:20<1:28:47,  5.18s/it]

Episode 971/2000, real env return = -131.34


 49%|█████████████████▋                  | 981/2000 [1:23:11<1:27:35,  5.16s/it]

Episode 981/2000, real env return = 10.29


 50%|█████████████████▊                  | 991/2000 [1:24:02<1:25:56,  5.11s/it]

Episode 991/2000, real env return = 22.93


 50%|█████████████████▌                 | 1001/2000 [1:24:54<1:25:06,  5.11s/it]

Episode 1001/2000, real env return = 56.15


 51%|█████████████████▋                 | 1011/2000 [1:25:45<1:24:09,  5.11s/it]

Episode 1011/2000, real env return = 201.50


 51%|█████████████████▊                 | 1021/2000 [1:26:36<1:22:45,  5.07s/it]

Episode 1021/2000, real env return = -215.68


 52%|██████████████████                 | 1031/2000 [1:27:28<1:22:21,  5.10s/it]

Episode 1031/2000, real env return = 1090.82


 52%|██████████████████▏                | 1041/2000 [1:28:19<1:21:45,  5.12s/it]

Episode 1041/2000, real env return = -283.45


 53%|██████████████████▍                | 1051/2000 [1:29:10<1:21:25,  5.15s/it]

Episode 1051/2000, real env return = 260.42


 53%|██████████████████▌                | 1061/2000 [1:30:03<1:21:56,  5.24s/it]

Episode 1061/2000, real env return = -301.65


 54%|██████████████████▋                | 1071/2000 [1:30:54<1:19:21,  5.13s/it]

Episode 1071/2000, real env return = 273.20


 54%|██████████████████▉                | 1081/2000 [1:31:46<1:19:14,  5.17s/it]

Episode 1081/2000, real env return = -321.33


 55%|███████████████████                | 1091/2000 [1:32:37<1:18:42,  5.20s/it]

Episode 1091/2000, real env return = 915.94


 55%|███████████████████▎               | 1101/2000 [1:33:29<1:16:36,  5.11s/it]

Episode 1101/2000, real env return = 1007.71


 56%|███████████████████▍               | 1111/2000 [1:34:22<1:17:26,  5.23s/it]

Episode 1111/2000, real env return = 1343.34


 56%|███████████████████▌               | 1121/2000 [1:35:13<1:14:59,  5.12s/it]

Episode 1121/2000, real env return = 1296.11


 57%|███████████████████▊               | 1131/2000 [1:36:04<1:14:08,  5.12s/it]

Episode 1131/2000, real env return = 1321.88


 57%|███████████████████▉               | 1141/2000 [1:36:55<1:12:55,  5.09s/it]

Episode 1141/2000, real env return = 1423.88


 58%|████████████████████▏              | 1151/2000 [1:37:46<1:12:38,  5.13s/it]

Episode 1151/2000, real env return = 1434.62


 58%|████████████████████▎              | 1161/2000 [1:38:38<1:11:38,  5.12s/it]

Episode 1161/2000, real env return = 1408.00


 59%|████████████████████▍              | 1171/2000 [1:39:29<1:10:56,  5.13s/it]

Episode 1171/2000, real env return = 1151.88


 59%|████████████████████▋              | 1181/2000 [1:40:20<1:09:55,  5.12s/it]

Episode 1181/2000, real env return = 1499.15


 60%|████████████████████▊              | 1191/2000 [1:41:11<1:08:42,  5.10s/it]

Episode 1191/2000, real env return = 1472.78


 60%|█████████████████████              | 1201/2000 [1:42:02<1:08:11,  5.12s/it]

Episode 1201/2000, real env return = 1551.80


 61%|█████████████████████▏             | 1211/2000 [1:42:53<1:07:07,  5.10s/it]

Episode 1211/2000, real env return = 1545.09


 61%|█████████████████████▎             | 1221/2000 [1:43:45<1:06:45,  5.14s/it]

Episode 1221/2000, real env return = 789.80


 62%|█████████████████████▌             | 1231/2000 [1:44:36<1:04:58,  5.07s/it]

Episode 1231/2000, real env return = 1481.96


 62%|█████████████████████▋             | 1241/2000 [1:45:27<1:04:47,  5.12s/it]

Episode 1241/2000, real env return = 1669.48


 63%|█████████████████████▉             | 1251/2000 [1:46:18<1:03:54,  5.12s/it]

Episode 1251/2000, real env return = 1556.69


 63%|██████████████████████             | 1261/2000 [1:47:10<1:03:18,  5.14s/it]

Episode 1261/2000, real env return = 1714.15


 64%|██████████████████████▏            | 1271/2000 [1:48:01<1:02:06,  5.11s/it]

Episode 1271/2000, real env return = 1692.20


 64%|██████████████████████▍            | 1281/2000 [1:48:52<1:01:41,  5.15s/it]

Episode 1281/2000, real env return = 1633.76


 65%|██████████████████████▌            | 1291/2000 [1:49:43<1:00:52,  5.15s/it]

Episode 1291/2000, real env return = 1668.54


 65%|████████████████████████             | 1301/2000 [1:50:34<59:07,  5.07s/it]

Episode 1301/2000, real env return = 1694.55


 66%|████████████████████████▎            | 1311/2000 [1:51:25<58:07,  5.06s/it]

Episode 1311/2000, real env return = 1847.97


 66%|████████████████████████▍            | 1321/2000 [1:52:16<57:12,  5.06s/it]

Episode 1321/2000, real env return = 1836.68


 67%|████████████████████████▌            | 1331/2000 [1:53:07<56:59,  5.11s/it]

Episode 1331/2000, real env return = 1826.29


 67%|████████████████████████▊            | 1341/2000 [1:53:58<56:06,  5.11s/it]

Episode 1341/2000, real env return = 1852.08


 68%|████████████████████████▉            | 1351/2000 [1:54:49<55:33,  5.14s/it]

Episode 1351/2000, real env return = 1880.03


 68%|█████████████████████████▏           | 1361/2000 [1:55:40<55:02,  5.17s/it]

Episode 1361/2000, real env return = 1877.28


 69%|█████████████████████████▎           | 1371/2000 [1:56:31<53:32,  5.11s/it]

Episode 1371/2000, real env return = 1749.32


 69%|█████████████████████████▌           | 1381/2000 [1:57:22<52:36,  5.10s/it]

Episode 1381/2000, real env return = 1883.98


 70%|█████████████████████████▋           | 1391/2000 [1:58:13<51:25,  5.07s/it]

Episode 1391/2000, real env return = 1689.50


 70%|█████████████████████████▉           | 1401/2000 [1:59:04<50:26,  5.05s/it]

Episode 1401/2000, real env return = 1677.22


 71%|██████████████████████████           | 1411/2000 [1:59:55<50:04,  5.10s/it]

Episode 1411/2000, real env return = 1893.33


 71%|██████████████████████████▎          | 1421/2000 [2:00:46<49:33,  5.13s/it]

Episode 1421/2000, real env return = 1841.99


 72%|██████████████████████████▍          | 1431/2000 [2:01:38<48:40,  5.13s/it]

Episode 1431/2000, real env return = 1835.68


 72%|██████████████████████████▋          | 1441/2000 [2:02:29<47:59,  5.15s/it]

Episode 1441/2000, real env return = 1810.46


 73%|██████████████████████████▊          | 1451/2000 [2:03:20<46:31,  5.08s/it]

Episode 1451/2000, real env return = 1819.34


 73%|███████████████████████████          | 1461/2000 [2:04:11<45:50,  5.10s/it]

Episode 1461/2000, real env return = 1778.80


 74%|███████████████████████████▏         | 1471/2000 [2:05:02<45:06,  5.12s/it]

Episode 1471/2000, real env return = 1969.56


 74%|███████████████████████████▍         | 1481/2000 [2:05:53<43:58,  5.08s/it]

Episode 1481/2000, real env return = 1911.45


 75%|███████████████████████████▌         | 1491/2000 [2:06:45<43:19,  5.11s/it]

Episode 1491/2000, real env return = 1799.30


 75%|███████████████████████████▊         | 1501/2000 [2:07:36<42:35,  5.12s/it]

Episode 1501/2000, real env return = 1921.05


 76%|███████████████████████████▉         | 1511/2000 [2:08:28<43:21,  5.32s/it]

Episode 1511/2000, real env return = 1975.01


 76%|████████████████████████████▏        | 1521/2000 [2:09:20<41:16,  5.17s/it]

Episode 1521/2000, real env return = 1981.05


 77%|████████████████████████████▎        | 1531/2000 [2:10:11<40:21,  5.16s/it]

Episode 1531/2000, real env return = 1957.05


 77%|████████████████████████████▌        | 1541/2000 [2:11:02<39:21,  5.15s/it]

Episode 1541/2000, real env return = 1894.74


 78%|████████████████████████████▋        | 1551/2000 [2:11:53<38:01,  5.08s/it]

Episode 1551/2000, real env return = 1884.34


 78%|████████████████████████████▉        | 1561/2000 [2:12:44<37:03,  5.06s/it]

Episode 1561/2000, real env return = 1890.67


 79%|█████████████████████████████        | 1571/2000 [2:13:35<36:41,  5.13s/it]

Episode 1571/2000, real env return = 1828.70


 79%|█████████████████████████████▏       | 1581/2000 [2:14:26<35:31,  5.09s/it]

Episode 1581/2000, real env return = 1934.36


 80%|█████████████████████████████▍       | 1591/2000 [2:15:17<34:49,  5.11s/it]

Episode 1591/2000, real env return = 1987.14


 80%|█████████████████████████████▌       | 1601/2000 [2:16:09<34:05,  5.13s/it]

Episode 1601/2000, real env return = 1942.65


 81%|█████████████████████████████▊       | 1611/2000 [2:17:00<33:21,  5.15s/it]

Episode 1611/2000, real env return = 1953.49


 81%|█████████████████████████████▉       | 1621/2000 [2:17:51<32:36,  5.16s/it]

Episode 1621/2000, real env return = 1964.67


 82%|██████████████████████████████▏      | 1631/2000 [2:18:42<31:22,  5.10s/it]

Episode 1631/2000, real env return = 1775.93


 82%|██████████████████████████████▎      | 1641/2000 [2:19:34<30:36,  5.12s/it]

Episode 1641/2000, real env return = 1858.12


 83%|██████████████████████████████▌      | 1651/2000 [2:20:25<29:36,  5.09s/it]

Episode 1651/2000, real env return = 1872.87


 83%|██████████████████████████████▋      | 1661/2000 [2:21:16<28:40,  5.08s/it]

Episode 1661/2000, real env return = 1924.81


 84%|██████████████████████████████▉      | 1671/2000 [2:22:07<28:00,  5.11s/it]

Episode 1671/2000, real env return = 1864.58


 84%|███████████████████████████████      | 1681/2000 [2:22:59<27:17,  5.13s/it]

Episode 1681/2000, real env return = 1849.48


 85%|███████████████████████████████▎     | 1691/2000 [2:23:50<26:33,  5.16s/it]

Episode 1691/2000, real env return = 1794.14


 85%|███████████████████████████████▍     | 1701/2000 [2:24:41<25:33,  5.13s/it]

Episode 1701/2000, real env return = 1912.78


 86%|███████████████████████████████▋     | 1711/2000 [2:25:33<24:54,  5.17s/it]

Episode 1711/2000, real env return = 1926.80


 86%|███████████████████████████████▊     | 1721/2000 [2:26:24<23:56,  5.15s/it]

Episode 1721/2000, real env return = 1963.48


 87%|████████████████████████████████     | 1731/2000 [2:27:16<23:16,  5.19s/it]

Episode 1731/2000, real env return = 1905.21


 87%|████████████████████████████████▏    | 1741/2000 [2:28:07<22:10,  5.14s/it]

Episode 1741/2000, real env return = 1882.13


 88%|████████████████████████████████▍    | 1751/2000 [2:28:58<21:02,  5.07s/it]

Episode 1751/2000, real env return = 2088.19


 88%|████████████████████████████████▌    | 1761/2000 [2:29:49<20:12,  5.07s/it]

Episode 1761/2000, real env return = 1823.48


 89%|████████████████████████████████▊    | 1771/2000 [2:30:40<19:29,  5.11s/it]

Episode 1771/2000, real env return = 1948.00


 89%|████████████████████████████████▉    | 1781/2000 [2:31:31<18:42,  5.13s/it]

Episode 1781/2000, real env return = 2028.17


 90%|█████████████████████████████████▏   | 1791/2000 [2:32:22<17:53,  5.14s/it]

Episode 1791/2000, real env return = 2047.97


 90%|█████████████████████████████████▎   | 1801/2000 [2:33:13<16:58,  5.12s/it]

Episode 1801/2000, real env return = 1837.19


 91%|█████████████████████████████████▌   | 1811/2000 [2:34:04<16:04,  5.10s/it]

Episode 1811/2000, real env return = 1962.69


 91%|█████████████████████████████████▋   | 1821/2000 [2:34:56<15:12,  5.10s/it]

Episode 1821/2000, real env return = 2067.37


 92%|█████████████████████████████████▊   | 1831/2000 [2:35:47<14:15,  5.06s/it]

Episode 1831/2000, real env return = 2017.14


 92%|██████████████████████████████████   | 1841/2000 [2:36:38<13:36,  5.13s/it]

Episode 1841/2000, real env return = 2022.35


 93%|██████████████████████████████████▏  | 1851/2000 [2:37:29<12:40,  5.11s/it]

Episode 1851/2000, real env return = 1945.55


 93%|██████████████████████████████████▍  | 1861/2000 [2:38:21<11:53,  5.13s/it]

Episode 1861/2000, real env return = 2017.82


 94%|██████████████████████████████████▌  | 1871/2000 [2:39:12<11:10,  5.20s/it]

Episode 1871/2000, real env return = 1886.51


 94%|██████████████████████████████████▊  | 1881/2000 [2:40:03<10:16,  5.18s/it]

Episode 1881/2000, real env return = 2017.00


 95%|██████████████████████████████████▉  | 1891/2000 [2:40:55<09:18,  5.12s/it]

Episode 1891/2000, real env return = 1960.55


 95%|███████████████████████████████████▏ | 1901/2000 [2:41:46<08:26,  5.12s/it]

Episode 1901/2000, real env return = 1985.93


 96%|███████████████████████████████████▎ | 1911/2000 [2:42:37<07:34,  5.11s/it]

Episode 1911/2000, real env return = 2112.36


 96%|███████████████████████████████████▌ | 1921/2000 [2:43:28<06:42,  5.10s/it]

Episode 1921/2000, real env return = 2006.42


 97%|███████████████████████████████████▋ | 1931/2000 [2:44:20<05:53,  5.13s/it]

Episode 1931/2000, real env return = 2031.25


 97%|███████████████████████████████████▉ | 1941/2000 [2:45:11<05:01,  5.10s/it]

Episode 1941/2000, real env return = 2041.13


 98%|████████████████████████████████████ | 1951/2000 [2:46:02<04:11,  5.14s/it]

Episode 1951/2000, real env return = 2088.79


 98%|████████████████████████████████████▎| 1961/2000 [2:46:54<03:21,  5.17s/it]

Episode 1961/2000, real env return = 2053.00


 99%|████████████████████████████████████▍| 1971/2000 [2:47:45<02:29,  5.17s/it]

Episode 1971/2000, real env return = 2083.42


 99%|████████████████████████████████████▋| 1981/2000 [2:48:36<01:37,  5.13s/it]

Episode 1981/2000, real env return = 2128.80


100%|████████████████████████████████████▊| 1991/2000 [2:49:27<00:45,  5.08s/it]

Episode 1991/2000, real env return = 1990.43


100%|█████████████████████████████████████| 2000/2000 [2:50:13<00:00,  5.11s/it]


Training finished.


In [12]:
np.save('rand_halfcheetah_train5.npy', train5)