In [1]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

from stable_baselines3 import SAC
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.logger import configure

# ========= 1. Environment Wrapper =========
class EpisodicRewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.cumulative_reward = 0.0

    def step(self, action):
        result = self.env.step(action)
        if len(result) == 5:
            obs, reward, terminated, truncated, info = result
            done = terminated or truncated
        else:
            obs, reward, done, info = result

        # 记录累计的reward
        self.cumulative_reward += reward
        
        if done:
            reward = self.cumulative_reward
            self.cumulative_reward = 0.0
        else:
            reward = 0.0
        
        return obs, reward, done, info

    def reset(self, **kwargs):
        self.cumulative_reward = 0.0
        result = self.env.reset(**kwargs)
        if isinstance(result, tuple):
            obs, _ = result
        else:
            obs = result
        return obs




# ========= 2. 轨迹存储 =========
class TrajectoryReplay:
    def __init__(self):
        self.trajectories = []

    def add_trajectory(self, traj):
        self.trajectories.append(traj)

    def sample(self, batch_size):
        indices = np.random.randint(0, len(self.trajectories), size=batch_size)
        return [self.trajectories[i] for i in indices]

    def __len__(self):
        return len(self.trajectories)



# ========= 3. Reward Proxy ==========
class RewardModel(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_size=256):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(state_dim + action_dim, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
        )
        self.mu_layer = nn.Linear(hidden_size, 1)
        self.log_sigma_layer = nn.Linear(hidden_size, 1)

    def forward(self, s, a):
        x = torch.cat([s, a], dim=-1)
        x = self.net(x)
        mu = self.mu_layer(x)
        log_sigma = self.log_sigma_layer(x)
        sigma = torch.clamp(torch.exp(log_sigma), min=1e-3, max=2.0)
        return mu, sigma


# ========= 4. Train a reward model with batch trajectories ==========
def train_reward_model_gaussian_loo(reward_model, optimizer, trajectories, device='cpu'):
    """
    使用留一法训练奖励模型：
    对于每条轨迹 τ = { (s₀, a₀), ..., (s_{T-1}, a_{T-1}) }，
      - 真实轨迹奖励 R_ep = ∑ₜ rₜ（假设中间步奖励为0，只有最后一步累计）
      - 对每个时间步 i：
            * 得到每个 (s_t, a_t) 的预测参数 μ_t 和 σ_t；
            * 对于 t ≠ i，单独采样噪声 ε_{i,t} ~ N(0,1)，计算采样奖励：
                  r(s_t,a_t) = μ_t + σ_t * ε_{i,t}
            * 计算目标值 target_i = R_ep - ∑_{t ≠ i} r(s_t,a_t)
            * 计算该时间步的负对数似然：
                  nll_i = log(σ_i) + ((target_i - μ_i)²) / (2σ_i²)
    最后对每条轨迹所有时间步的 nll 取平均，再对所有轨迹取平均作为最终损失。
    """
    reward_model.train()
    total_loss = 0.0
    total_episodes = 0

    for traj in trajectories:
        # 真实轨迹奖励（假设中间步奖励为0，只有最后一步累计）
        R_ep = sum([t[2] for t in traj])
        Tj = len(traj)
        if Tj == 0:
            continue

        # 收集状态和动作
        s_list, a_list = [], []
        for (s, a, r, s_next, done) in traj:
            s_list.append(s)
            a_list.append(a)
        states = torch.tensor(s_list, dtype=torch.float, device=device)
        actions = torch.tensor(a_list, dtype=torch.float, device=device)
        
        # 预测整个轨迹每个 (s,a) 的参数，输出 shape 为 [Tj, 1]
        mu, sigma = reward_model(states, actions)
        
        nll_list = []
        for i in range(Tj):
            # 构造一个 mask，排除第 i 个时间步
            mask = torch.ones(Tj, dtype=torch.bool, device=device)
            mask[i] = False
            
            # 对于每个 t ≠ i，单独采样噪声 ε_{i,t} ~ N(0,1)
            epsilons = torch.randn((mask.sum(), 1), device=device)
            
            # 计算 t ≠ i 的采样奖励： r = μ + σ * ε_{i,t}
            r_samples = mu[mask] + epsilons * sigma[mask]
            leave_one_sum = r_samples.sum()  # 除去第 i 步的总奖励预测
            
            # 第 i 步的目标：R_ep - (其它时间步的采样奖励之和)
            target_i = R_ep - leave_one_sum
            
            # 计算第 i 步的负对数似然损失
            nll_i = torch.log(sigma[i]) + ((target_i - mu[i])**2) / (2 * sigma[i]**2)
            nll_list.append(nll_i)
        
        loss_traj = torch.stack(nll_list).mean()  # 对单条轨迹所有时刻取平均
        total_loss += loss_traj
        total_episodes += 1

    if total_episodes > 0:
        loss_mean = total_loss / total_episodes
    else:
        loss_mean = torch.tensor(0.0, device=device)

    optimizer.zero_grad()
    loss_mean.backward()
    optimizer.step()
    return loss_mean.item()








# ========= 5. How to collect a trajectory ==========
def collect_episodes(env, model, n_episodes, device='cpu'):
    trajectories = []
    for _ in range(n_episodes):
        result = env.reset()
        if isinstance(result, tuple):
            obs, _ = result
        else:
            obs = result
        done = False
        traj = []
        while not done:
            if obs is None or (hasattr(obs, '__len__') and len(obs) == 0):
                raise ValueError("Observation is empty, check env.reset() output")
            obs_tensor = torch.tensor(obs, dtype=torch.float, device=device).unsqueeze(0)
            with torch.no_grad():
                action, _states = model.predict(obs_tensor.cpu().numpy(), deterministic=False)
            result = env.step(action[0])
            
            if len(result) == 5:
                next_obs, reward, terminated, truncated, info = result
                done = terminated or truncated
            
            else:
                next_obs, reward, done, info = result
            traj.append((obs, action[0], reward, next_obs, done))
            obs = next_obs
        trajectories.append(traj)
    return trajectories


# ========= 6. 将新的reward模型生成的代理奖励存入SAC的ReplayBuffer ==========
def add_shaped_transitions_to_replay(model, reward_model, trajectories, device='cpu'):
    for traj in trajectories:
        states = []
        actions = []
        next_states = []
        dones = []
        for (s, a, r, s_next, d) in traj:
            states.append(s)
            actions.append(a)
            next_states.append(s_next)
            dones.append(d)
        
        s_tensor = torch.tensor(states, dtype=torch.float, device=device)
        a_tensor = torch.tensor(actions, dtype=torch.float, device=device)
        
        with torch.no_grad():
            mu, _ = reward_model(s_tensor, a_tensor)  # 这里我们用 μ 作为代理奖励
        shaped_rewards = mu.cpu().numpy().squeeze(-1)
        
        for i in range(len(traj)):
            s, a, _, s_next, d = traj[i]
            r_shaped = shaped_rewards[i]
            model.replay_buffer.add(
                s, s_next, a, r_shaped, d, infos=[{}],
            )

# ========= 7. 主函数 ==========
def main():
    class Args:
        env = "HalfCheetah-v4"
        episodes = 2000
        steps_per_update = 1000
        # 使用GPU
        device = "cuda"
    
    args = Args()
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    # 创建环境（注意环境包装仍然不改变总reward的逻辑）
    base_env = gym.make(args.env)
    env = EpisodicRewardWrapper(base_env)
    vec_env = DummyVecEnv([lambda: env])

    # 创建SAC模型（注意这里SAC的训练依赖于ReplayBuffer，我们将手动往buffer中填入代理奖励）
    model = SAC(
        policy="MlpPolicy",
        env=vec_env,
        verbose=1,
        seed=42,
        buffer_size=100000,
        learning_starts=0,
        train_freq=1,
        gradient_steps=0,
        batch_size=256,
        gamma=0.99,
        device=device
    )
    # 配置logger
    from stable_baselines3.common.logger import configure
    model._logger = configure(folder=None, format_strings=["stdout"])

    # 创建奖励模型（现在输出μ和σ）
    state_dim = base_env.observation_space.shape[0]
    action_dim = base_env.action_space.shape[0]
    reward_model = RewardModel(state_dim, action_dim, hidden_size=256).to(device)
    reward_optimizer = optim.Adam(reward_model.parameters(), lr=3e-4)

    traj_replay = TrajectoryReplay()

    ep_record = []
    for ep in tqdm(range(args.episodes)):
        # (a) 收集轨迹
        new_trajectories = collect_episodes(env, model, n_episodes=1, device=device)
        for traj in new_trajectories:
            traj_replay.add_trajectory(traj)
        
        # (b) 当有足够轨迹后，用Gaussian对数似然训练奖励模型
        if len(traj_replay) >= 8:
            batch_size_trajectories = 8
            sampled_trajs = traj_replay.sample(batch_size_trajectories)
            loss_val = train_reward_model_gaussian_loo(
                reward_model, reward_optimizer, sampled_trajs, device=device
            )
        
        # (c) 将新轨迹用更新后的奖励模型生成的代理奖励存入SAC的ReplayBuffer
        add_shaped_transitions_to_replay(model, reward_model, new_trajectories, device=device)
        
        # (d) 用SAC更新策略
        if len(traj_replay) >= 8:
            model.train(args.steps_per_update)
        
        # (e) 简单评估：输出最新轨迹的真实环境回报
        ep_return = sum([t[2] for t in new_trajectories[-1]])
        ep_record.append(ep_return)
        if ep % 10 == 0:
            print(f"Episode {ep+1}/{args.episodes}, real env return = {ep_return:.2f}")

    print("Training finished.")
    return ep_record

In [2]:
train1 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<11:41,  2.85it/s]

Episode 1/2000, real env return = -209.21


  1%|▏                                      | 11/2000 [00:36<4:11:22,  7.58s/it]

Episode 11/2000, real env return = -309.77


  1%|▍                                      | 21/2000 [02:46<7:01:17, 12.77s/it]

Episode 21/2000, real env return = -342.20


  2%|▌                                      | 31/2000 [04:56<7:04:17, 12.93s/it]

Episode 31/2000, real env return = -235.86


  2%|▊                                      | 41/2000 [07:05<7:04:34, 13.00s/it]

Episode 41/2000, real env return = -426.66


  3%|▉                                      | 51/2000 [09:14<6:59:20, 12.91s/it]

Episode 51/2000, real env return = -240.61


  3%|█▏                                     | 61/2000 [11:25<7:00:17, 13.01s/it]

Episode 61/2000, real env return = -76.93


  4%|█▍                                     | 71/2000 [13:35<6:59:37, 13.05s/it]

Episode 71/2000, real env return = -217.86


  4%|█▌                                     | 81/2000 [15:46<6:58:28, 13.08s/it]

Episode 81/2000, real env return = -204.96


  5%|█▊                                     | 91/2000 [17:55<6:50:50, 12.91s/it]

Episode 91/2000, real env return = -300.29


  5%|█▉                                    | 101/2000 [20:06<6:51:43, 13.01s/it]

Episode 101/2000, real env return = -99.75


  6%|██                                    | 111/2000 [22:16<6:50:07, 13.03s/it]

Episode 111/2000, real env return = -191.84


  6%|██▎                                   | 121/2000 [24:27<6:48:40, 13.05s/it]

Episode 121/2000, real env return = -268.57


  7%|██▍                                   | 131/2000 [26:37<6:45:12, 13.01s/it]

Episode 131/2000, real env return = -187.20


  7%|██▋                                   | 141/2000 [28:47<6:43:44, 13.03s/it]

Episode 141/2000, real env return = -249.15


  8%|██▊                                   | 151/2000 [30:57<6:40:43, 13.00s/it]

Episode 151/2000, real env return = -211.28


  8%|███                                   | 161/2000 [33:07<6:37:36, 12.97s/it]

Episode 161/2000, real env return = -219.81


  9%|███▏                                  | 171/2000 [35:17<6:38:36, 13.08s/it]

Episode 171/2000, real env return = -116.04


  9%|███▍                                  | 181/2000 [37:27<6:33:24, 12.98s/it]

Episode 181/2000, real env return = 758.07


 10%|███▋                                  | 191/2000 [39:38<6:32:35, 13.02s/it]

Episode 191/2000, real env return = 1088.48


 10%|███▊                                  | 201/2000 [41:48<6:30:32, 13.03s/it]

Episode 201/2000, real env return = -357.53


 11%|████                                  | 211/2000 [43:59<6:27:36, 13.00s/it]

Episode 211/2000, real env return = -250.98


 11%|████▏                                 | 221/2000 [46:09<6:26:55, 13.05s/it]

Episode 221/2000, real env return = 556.38


 12%|████▍                                 | 231/2000 [48:20<6:26:03, 13.09s/it]

Episode 231/2000, real env return = 1434.10


 12%|████▌                                 | 241/2000 [50:30<6:22:32, 13.05s/it]

Episode 241/2000, real env return = 1581.46


 13%|████▊                                 | 251/2000 [52:41<6:21:03, 13.07s/it]

Episode 251/2000, real env return = 1752.18


 13%|████▉                                 | 261/2000 [54:51<6:17:28, 13.02s/it]

Episode 261/2000, real env return = 2033.85


 14%|█████▏                                | 271/2000 [57:02<6:17:03, 13.08s/it]

Episode 271/2000, real env return = 2182.01


 14%|█████▎                                | 281/2000 [59:12<6:12:25, 13.00s/it]

Episode 281/2000, real env return = 2679.97


 15%|█████▏                              | 291/2000 [1:01:22<6:12:57, 13.09s/it]

Episode 291/2000, real env return = 2839.99


 15%|█████▍                              | 301/2000 [1:03:33<6:08:58, 13.03s/it]

Episode 301/2000, real env return = 2855.78


 16%|█████▌                              | 311/2000 [1:05:43<6:07:45, 13.06s/it]

Episode 311/2000, real env return = 3011.59


 16%|█████▊                              | 321/2000 [1:07:54<6:05:42, 13.07s/it]

Episode 321/2000, real env return = 3170.75


 17%|█████▉                              | 331/2000 [1:10:05<6:04:04, 13.09s/it]

Episode 331/2000, real env return = 3036.85


 17%|██████▏                             | 341/2000 [1:12:16<6:01:34, 13.08s/it]

Episode 341/2000, real env return = 3001.03


 18%|██████▎                             | 351/2000 [1:14:27<5:59:01, 13.06s/it]

Episode 351/2000, real env return = 3173.32


 18%|██████▍                             | 361/2000 [1:16:38<5:56:50, 13.06s/it]

Episode 361/2000, real env return = 3133.48


 19%|██████▋                             | 371/2000 [1:18:49<5:53:32, 13.02s/it]

Episode 371/2000, real env return = 3367.05


 19%|██████▊                             | 381/2000 [1:21:00<5:53:02, 13.08s/it]

Episode 381/2000, real env return = 3565.46


 20%|███████                             | 391/2000 [1:23:10<5:50:18, 13.06s/it]

Episode 391/2000, real env return = 3525.73


 20%|███████▏                            | 401/2000 [1:25:22<5:48:05, 13.06s/it]

Episode 401/2000, real env return = 3539.06


 21%|███████▍                            | 411/2000 [1:27:33<5:46:39, 13.09s/it]

Episode 411/2000, real env return = 3468.23


 21%|███████▌                            | 421/2000 [1:29:43<5:43:19, 13.05s/it]

Episode 421/2000, real env return = 3806.14


 22%|███████▊                            | 431/2000 [1:31:54<5:42:16, 13.09s/it]

Episode 431/2000, real env return = 3711.27


 22%|███████▉                            | 441/2000 [1:34:05<5:38:53, 13.04s/it]

Episode 441/2000, real env return = 3800.09


 23%|████████                            | 451/2000 [1:36:16<5:36:56, 13.05s/it]

Episode 451/2000, real env return = 3630.08


 23%|████████▎                           | 461/2000 [1:38:27<5:35:15, 13.07s/it]

Episode 461/2000, real env return = 3705.07


 24%|████████▍                           | 471/2000 [1:40:38<5:34:02, 13.11s/it]

Episode 471/2000, real env return = 3790.19


 24%|████████▋                           | 481/2000 [1:42:49<5:32:53, 13.15s/it]

Episode 481/2000, real env return = 4156.55


 25%|████████▊                           | 491/2000 [1:45:01<5:34:46, 13.31s/it]

Episode 491/2000, real env return = 3785.83


 25%|█████████                           | 501/2000 [1:47:13<5:28:51, 13.16s/it]

Episode 501/2000, real env return = 3657.02


 26%|█████████▏                          | 511/2000 [1:49:24<5:26:25, 13.15s/it]

Episode 511/2000, real env return = 3962.11


 26%|█████████▍                          | 521/2000 [1:51:36<5:23:25, 13.12s/it]

Episode 521/2000, real env return = 4036.33


 27%|█████████▌                          | 531/2000 [1:53:47<5:21:09, 13.12s/it]

Episode 531/2000, real env return = 4268.28


 27%|█████████▋                          | 541/2000 [1:55:58<5:19:50, 13.15s/it]

Episode 541/2000, real env return = 4188.59


 28%|█████████▉                          | 551/2000 [1:58:09<5:15:15, 13.05s/it]

Episode 551/2000, real env return = 4216.44


 28%|██████████                          | 561/2000 [2:00:20<5:13:19, 13.06s/it]

Episode 561/2000, real env return = 3908.38


 29%|██████████▎                         | 571/2000 [2:02:34<5:18:01, 13.35s/it]

Episode 571/2000, real env return = 4353.75


 29%|██████████▍                         | 581/2000 [2:04:45<5:10:18, 13.12s/it]

Episode 581/2000, real env return = 4197.53


 30%|██████████▋                         | 591/2000 [2:06:56<5:08:31, 13.14s/it]

Episode 591/2000, real env return = 4442.70


 30%|██████████▊                         | 601/2000 [2:09:08<5:05:52, 13.12s/it]

Episode 601/2000, real env return = 4128.68


 31%|██████████▉                         | 611/2000 [2:11:20<5:03:59, 13.13s/it]

Episode 611/2000, real env return = 4447.85


 31%|███████████▏                        | 621/2000 [2:13:31<5:00:10, 13.06s/it]

Episode 621/2000, real env return = 3950.61


 32%|███████████▎                        | 631/2000 [2:15:42<4:59:40, 13.13s/it]

Episode 631/2000, real env return = 4214.02


 32%|███████████▌                        | 641/2000 [2:17:54<4:57:19, 13.13s/it]

Episode 641/2000, real env return = 4189.83


 33%|███████████▋                        | 651/2000 [2:20:05<4:55:33, 13.15s/it]

Episode 651/2000, real env return = 4436.86


 33%|███████████▉                        | 661/2000 [2:22:16<4:53:14, 13.14s/it]

Episode 661/2000, real env return = 4053.28


 34%|████████████                        | 671/2000 [2:24:27<4:49:10, 13.06s/it]

Episode 671/2000, real env return = 4202.19


 34%|████████████▎                       | 681/2000 [2:26:38<4:48:32, 13.13s/it]

Episode 681/2000, real env return = 3879.91


 35%|████████████▍                       | 691/2000 [2:28:49<4:45:57, 13.11s/it]

Episode 691/2000, real env return = 4359.81


 35%|████████████▌                       | 701/2000 [2:31:00<4:43:51, 13.11s/it]

Episode 701/2000, real env return = 4366.16


 36%|████████████▊                       | 711/2000 [2:33:12<4:42:50, 13.17s/it]

Episode 711/2000, real env return = 4548.04


 36%|████████████▉                       | 721/2000 [2:35:23<4:40:10, 13.14s/it]

Episode 721/2000, real env return = 4559.25


 37%|█████████████▏                      | 731/2000 [2:37:35<4:37:33, 13.12s/it]

Episode 731/2000, real env return = 4066.76


 37%|█████████████▎                      | 741/2000 [2:39:47<4:37:16, 13.21s/it]

Episode 741/2000, real env return = 4811.85


 38%|█████████████▌                      | 751/2000 [2:41:58<4:32:50, 13.11s/it]

Episode 751/2000, real env return = 4190.78


 38%|█████████████▋                      | 761/2000 [2:44:09<4:31:02, 13.13s/it]

Episode 761/2000, real env return = 4625.12


 39%|█████████████▉                      | 771/2000 [2:46:20<4:28:54, 13.13s/it]

Episode 771/2000, real env return = 4525.35


 39%|██████████████                      | 781/2000 [2:48:31<4:26:18, 13.11s/it]

Episode 781/2000, real env return = 4774.95


 40%|██████████████▏                     | 791/2000 [2:50:42<4:23:50, 13.09s/it]

Episode 791/2000, real env return = 4836.20


 40%|██████████████▍                     | 801/2000 [2:52:01<2:09:20,  6.47s/it]

Episode 801/2000, real env return = 4374.56


 41%|██████████████▌                     | 811/2000 [2:52:59<1:55:13,  5.81s/it]

Episode 811/2000, real env return = 4948.42


 41%|██████████████▊                     | 821/2000 [2:53:56<1:53:23,  5.77s/it]

Episode 821/2000, real env return = 4877.50


 42%|██████████████▉                     | 831/2000 [2:54:55<1:55:24,  5.92s/it]

Episode 831/2000, real env return = 4867.59


 42%|███████████████▏                    | 841/2000 [2:55:53<1:51:57,  5.80s/it]

Episode 841/2000, real env return = 4651.45


 43%|███████████████▎                    | 851/2000 [2:56:52<1:51:27,  5.82s/it]

Episode 851/2000, real env return = 4616.44


 43%|███████████████▍                    | 861/2000 [2:57:49<1:49:45,  5.78s/it]

Episode 861/2000, real env return = 4849.34


 44%|███████████████▋                    | 871/2000 [2:58:48<1:50:40,  5.88s/it]

Episode 871/2000, real env return = 4786.12


 44%|███████████████▊                    | 881/2000 [2:59:46<1:49:51,  5.89s/it]

Episode 881/2000, real env return = 4923.76


 45%|████████████████                    | 891/2000 [3:00:43<1:46:52,  5.78s/it]

Episode 891/2000, real env return = 4718.43


 45%|████████████████▏                   | 901/2000 [3:01:41<1:44:24,  5.70s/it]

Episode 901/2000, real env return = 5098.67


 46%|████████████████▍                   | 911/2000 [3:02:38<1:44:16,  5.75s/it]

Episode 911/2000, real env return = 4896.80


 46%|████████████████▌                   | 921/2000 [3:03:36<1:42:33,  5.70s/it]

Episode 921/2000, real env return = 4902.77


 47%|████████████████▊                   | 931/2000 [3:04:33<1:42:42,  5.76s/it]

Episode 931/2000, real env return = 5070.88


 47%|████████████████▉                   | 941/2000 [3:05:31<1:42:25,  5.80s/it]

Episode 941/2000, real env return = 5081.46


 48%|█████████████████                   | 951/2000 [3:06:29<1:40:17,  5.74s/it]

Episode 951/2000, real env return = 5223.23


 48%|█████████████████▎                  | 961/2000 [3:07:26<1:39:55,  5.77s/it]

Episode 961/2000, real env return = 4928.53


 49%|█████████████████▍                  | 971/2000 [3:08:25<1:39:05,  5.78s/it]

Episode 971/2000, real env return = 5386.85


 49%|█████████████████▋                  | 981/2000 [3:09:22<1:38:21,  5.79s/it]

Episode 981/2000, real env return = 5000.61


 50%|█████████████████▊                  | 991/2000 [3:10:19<1:36:10,  5.72s/it]

Episode 991/2000, real env return = 4331.00


 50%|█████████████████▌                 | 1001/2000 [3:11:17<1:35:12,  5.72s/it]

Episode 1001/2000, real env return = 5496.16


 51%|█████████████████▋                 | 1011/2000 [3:12:14<1:34:07,  5.71s/it]

Episode 1011/2000, real env return = 5228.65


 51%|█████████████████▊                 | 1021/2000 [3:13:12<1:34:51,  5.81s/it]

Episode 1021/2000, real env return = 5500.30


 52%|██████████████████                 | 1031/2000 [3:14:09<1:32:59,  5.76s/it]

Episode 1031/2000, real env return = 5514.28


 52%|██████████████████▏                | 1041/2000 [3:15:07<1:33:45,  5.87s/it]

Episode 1041/2000, real env return = 5340.96


 53%|██████████████████▍                | 1051/2000 [3:16:05<1:32:02,  5.82s/it]

Episode 1051/2000, real env return = 4929.71


 53%|██████████████████▌                | 1061/2000 [3:17:04<1:30:35,  5.79s/it]

Episode 1061/2000, real env return = 5816.66


 54%|██████████████████▋                | 1071/2000 [3:18:01<1:30:39,  5.86s/it]

Episode 1071/2000, real env return = 5861.31


 54%|██████████████████▉                | 1081/2000 [3:19:00<1:29:26,  5.84s/it]

Episode 1081/2000, real env return = 6037.64


 55%|███████████████████                | 1091/2000 [3:19:58<1:28:13,  5.82s/it]

Episode 1091/2000, real env return = 5488.83


 55%|███████████████████▎               | 1101/2000 [3:20:56<1:26:45,  5.79s/it]

Episode 1101/2000, real env return = 5474.35


 56%|███████████████████▍               | 1111/2000 [3:21:54<1:25:50,  5.79s/it]

Episode 1111/2000, real env return = 5827.66


 56%|███████████████████▌               | 1121/2000 [3:22:52<1:26:09,  5.88s/it]

Episode 1121/2000, real env return = 5372.35


 57%|███████████████████▊               | 1131/2000 [3:23:50<1:22:53,  5.72s/it]

Episode 1131/2000, real env return = 5875.08


 57%|███████████████████▉               | 1141/2000 [3:24:48<1:23:11,  5.81s/it]

Episode 1141/2000, real env return = 6297.45


 58%|████████████████████▏              | 1151/2000 [3:25:47<1:24:28,  5.97s/it]

Episode 1151/2000, real env return = 6222.24


 58%|████████████████████▎              | 1161/2000 [3:26:45<1:20:03,  5.73s/it]

Episode 1161/2000, real env return = 6477.15


 59%|████████████████████▍              | 1171/2000 [3:27:42<1:19:15,  5.74s/it]

Episode 1171/2000, real env return = 5283.65


 59%|████████████████████▋              | 1181/2000 [3:28:40<1:19:20,  5.81s/it]

Episode 1181/2000, real env return = 6023.19


 60%|████████████████████▊              | 1191/2000 [3:29:38<1:18:23,  5.81s/it]

Episode 1191/2000, real env return = 5373.61


 60%|█████████████████████              | 1201/2000 [3:30:35<1:16:52,  5.77s/it]

Episode 1201/2000, real env return = 5991.10


 61%|█████████████████████▏             | 1211/2000 [3:31:33<1:16:36,  5.83s/it]

Episode 1211/2000, real env return = 6525.03


 61%|█████████████████████▎             | 1221/2000 [3:32:31<1:15:38,  5.83s/it]

Episode 1221/2000, real env return = 6251.02


 62%|█████████████████████▌             | 1231/2000 [3:33:31<1:16:43,  5.99s/it]

Episode 1231/2000, real env return = 6011.89


 62%|█████████████████████▋             | 1241/2000 [3:34:29<1:13:20,  5.80s/it]

Episode 1241/2000, real env return = 6267.59


 63%|█████████████████████▉             | 1251/2000 [3:35:27<1:12:15,  5.79s/it]

Episode 1251/2000, real env return = 6130.93


 63%|██████████████████████             | 1261/2000 [3:36:24<1:11:12,  5.78s/it]

Episode 1261/2000, real env return = 6784.03


 64%|██████████████████████▏            | 1271/2000 [3:37:22<1:10:02,  5.76s/it]

Episode 1271/2000, real env return = 6523.49


 64%|██████████████████████▍            | 1281/2000 [3:38:20<1:08:45,  5.74s/it]

Episode 1281/2000, real env return = 6787.47


 65%|██████████████████████▌            | 1291/2000 [3:39:18<1:08:34,  5.80s/it]

Episode 1291/2000, real env return = 6919.58


 65%|██████████████████████▊            | 1301/2000 [3:40:16<1:07:19,  5.78s/it]

Episode 1301/2000, real env return = 6797.05


 66%|██████████████████████▉            | 1311/2000 [3:41:13<1:06:04,  5.75s/it]

Episode 1311/2000, real env return = 5929.18


 66%|███████████████████████            | 1321/2000 [3:42:11<1:05:13,  5.76s/it]

Episode 1321/2000, real env return = 6679.77


 67%|███████████████████████▎           | 1331/2000 [3:43:09<1:04:11,  5.76s/it]

Episode 1331/2000, real env return = 6825.09


 67%|███████████████████████▍           | 1341/2000 [3:44:06<1:03:22,  5.77s/it]

Episode 1341/2000, real env return = 6628.17


 68%|███████████████████████▋           | 1351/2000 [3:45:04<1:02:31,  5.78s/it]

Episode 1351/2000, real env return = 6897.32


 68%|███████████████████████▊           | 1361/2000 [3:46:02<1:01:36,  5.79s/it]

Episode 1361/2000, real env return = 7123.84


 69%|███████████████████████▉           | 1371/2000 [3:47:00<1:00:39,  5.79s/it]

Episode 1371/2000, real env return = 6742.97


 69%|█████████████████████████▌           | 1381/2000 [3:47:58<59:34,  5.77s/it]

Episode 1381/2000, real env return = 7125.84


 70%|█████████████████████████▋           | 1391/2000 [3:48:55<58:14,  5.74s/it]

Episode 1391/2000, real env return = 7032.79


 70%|█████████████████████████▉           | 1401/2000 [3:49:54<58:42,  5.88s/it]

Episode 1401/2000, real env return = 7542.23


 71%|██████████████████████████           | 1411/2000 [3:50:51<56:44,  5.78s/it]

Episode 1411/2000, real env return = 7395.75


 71%|██████████████████████████▎          | 1421/2000 [3:51:49<56:05,  5.81s/it]

Episode 1421/2000, real env return = 7416.36


 72%|██████████████████████████▍          | 1431/2000 [3:52:47<55:03,  5.81s/it]

Episode 1431/2000, real env return = 6926.51


 72%|██████████████████████████▋          | 1441/2000 [3:53:46<54:30,  5.85s/it]

Episode 1441/2000, real env return = 7827.76


 73%|██████████████████████████▊          | 1451/2000 [3:54:43<52:10,  5.70s/it]

Episode 1451/2000, real env return = 7611.53


 73%|███████████████████████████          | 1461/2000 [3:55:41<51:50,  5.77s/it]

Episode 1461/2000, real env return = 7227.58


 74%|███████████████████████████▏         | 1471/2000 [3:56:40<51:20,  5.82s/it]

Episode 1471/2000, real env return = 7952.07


 74%|███████████████████████████▍         | 1481/2000 [3:57:38<50:09,  5.80s/it]

Episode 1481/2000, real env return = 7555.05


 75%|███████████████████████████▌         | 1491/2000 [3:58:36<49:07,  5.79s/it]

Episode 1491/2000, real env return = 7684.75


 75%|███████████████████████████▊         | 1501/2000 [3:59:34<48:22,  5.82s/it]

Episode 1501/2000, real env return = 7858.46


 76%|███████████████████████████▉         | 1511/2000 [4:00:32<46:51,  5.75s/it]

Episode 1511/2000, real env return = 8410.61


 76%|████████████████████████████▏        | 1521/2000 [4:01:30<46:27,  5.82s/it]

Episode 1521/2000, real env return = 8145.38


 77%|████████████████████████████▎        | 1531/2000 [4:02:27<45:03,  5.76s/it]

Episode 1531/2000, real env return = 7867.83


 77%|████████████████████████████▌        | 1541/2000 [4:03:25<44:09,  5.77s/it]

Episode 1541/2000, real env return = 8006.93


 78%|████████████████████████████▋        | 1551/2000 [4:04:24<43:36,  5.83s/it]

Episode 1551/2000, real env return = 8453.77


 78%|████████████████████████████▉        | 1561/2000 [4:05:22<42:11,  5.77s/it]

Episode 1561/2000, real env return = 8646.50


 79%|█████████████████████████████        | 1571/2000 [4:06:20<41:14,  5.77s/it]

Episode 1571/2000, real env return = 7006.00


 79%|█████████████████████████████▏       | 1581/2000 [4:07:18<40:23,  5.78s/it]

Episode 1581/2000, real env return = 7961.89


 80%|█████████████████████████████▍       | 1591/2000 [4:08:15<39:01,  5.73s/it]

Episode 1591/2000, real env return = 7735.13


 80%|█████████████████████████████▌       | 1601/2000 [4:09:12<37:58,  5.71s/it]

Episode 1601/2000, real env return = 8171.42


 81%|█████████████████████████████▊       | 1611/2000 [4:10:10<37:18,  5.75s/it]

Episode 1611/2000, real env return = 8102.12


 81%|█████████████████████████████▉       | 1621/2000 [4:11:08<36:30,  5.78s/it]

Episode 1621/2000, real env return = 8615.73


 82%|██████████████████████████████▏      | 1631/2000 [4:12:06<35:30,  5.77s/it]

Episode 1631/2000, real env return = 8193.30


 82%|██████████████████████████████▎      | 1641/2000 [4:13:04<34:45,  5.81s/it]

Episode 1641/2000, real env return = 8271.46


 83%|██████████████████████████████▌      | 1651/2000 [4:14:02<33:23,  5.74s/it]

Episode 1651/2000, real env return = 8807.59


 83%|██████████████████████████████▋      | 1661/2000 [4:15:00<32:25,  5.74s/it]

Episode 1661/2000, real env return = 8865.70


 84%|██████████████████████████████▉      | 1671/2000 [4:15:58<31:32,  5.75s/it]

Episode 1671/2000, real env return = 7882.00


 84%|███████████████████████████████      | 1681/2000 [4:16:56<30:42,  5.78s/it]

Episode 1681/2000, real env return = 8609.51


 85%|███████████████████████████████▎     | 1691/2000 [4:17:54<30:27,  5.91s/it]

Episode 1691/2000, real env return = 8685.93


 85%|███████████████████████████████▍     | 1701/2000 [4:18:53<29:18,  5.88s/it]

Episode 1701/2000, real env return = 8619.66


 86%|███████████████████████████████▋     | 1711/2000 [4:19:52<27:54,  5.79s/it]

Episode 1711/2000, real env return = 8560.93


 86%|███████████████████████████████▊     | 1721/2000 [4:20:50<27:29,  5.91s/it]

Episode 1721/2000, real env return = 8663.01


 87%|████████████████████████████████     | 1731/2000 [4:21:49<26:28,  5.90s/it]

Episode 1731/2000, real env return = 8660.11


 87%|████████████████████████████████▏    | 1741/2000 [4:22:47<25:08,  5.83s/it]

Episode 1741/2000, real env return = 8742.92


 88%|████████████████████████████████▍    | 1751/2000 [4:23:46<24:10,  5.83s/it]

Episode 1751/2000, real env return = 8435.85


 88%|████████████████████████████████▌    | 1761/2000 [4:24:44<22:58,  5.77s/it]

Episode 1761/2000, real env return = 8625.79


 89%|████████████████████████████████▊    | 1771/2000 [4:25:42<22:06,  5.79s/it]

Episode 1771/2000, real env return = 8419.04


 89%|████████████████████████████████▉    | 1781/2000 [4:26:40<20:58,  5.75s/it]

Episode 1781/2000, real env return = 9251.10


 90%|█████████████████████████████████▏   | 1791/2000 [4:27:37<19:57,  5.73s/it]

Episode 1791/2000, real env return = 8525.10


 90%|█████████████████████████████████▎   | 1801/2000 [4:28:34<19:12,  5.79s/it]

Episode 1801/2000, real env return = 8613.81


 91%|█████████████████████████████████▌   | 1811/2000 [4:29:32<18:11,  5.78s/it]

Episode 1811/2000, real env return = 9192.48


 91%|█████████████████████████████████▋   | 1821/2000 [4:30:30<17:26,  5.84s/it]

Episode 1821/2000, real env return = 7867.79


 92%|█████████████████████████████████▊   | 1831/2000 [4:31:27<16:08,  5.73s/it]

Episode 1831/2000, real env return = 8193.77


 92%|██████████████████████████████████   | 1841/2000 [4:32:26<15:26,  5.82s/it]

Episode 1841/2000, real env return = 9103.60


 93%|██████████████████████████████████▏  | 1851/2000 [4:33:24<14:17,  5.76s/it]

Episode 1851/2000, real env return = 8627.88


 93%|██████████████████████████████████▍  | 1861/2000 [4:34:22<13:28,  5.82s/it]

Episode 1861/2000, real env return = 8485.33


 94%|██████████████████████████████████▌  | 1871/2000 [4:35:20<12:27,  5.79s/it]

Episode 1871/2000, real env return = 9443.72


 94%|██████████████████████████████████▊  | 1881/2000 [4:36:18<11:24,  5.75s/it]

Episode 1881/2000, real env return = 8438.63


 95%|██████████████████████████████████▉  | 1891/2000 [4:37:16<10:30,  5.79s/it]

Episode 1891/2000, real env return = 8968.71


 95%|███████████████████████████████████▏ | 1901/2000 [4:38:14<09:30,  5.76s/it]

Episode 1901/2000, real env return = 7969.70


 96%|███████████████████████████████████▎ | 1911/2000 [4:39:11<08:40,  5.84s/it]

Episode 1911/2000, real env return = 8847.63


 96%|███████████████████████████████████▌ | 1921/2000 [4:40:09<07:34,  5.75s/it]

Episode 1921/2000, real env return = 8899.19


 97%|███████████████████████████████████▋ | 1931/2000 [4:41:07<06:36,  5.75s/it]

Episode 1931/2000, real env return = 9768.73


 97%|███████████████████████████████████▉ | 1941/2000 [4:42:04<05:39,  5.76s/it]

Episode 1941/2000, real env return = 8975.04


 98%|████████████████████████████████████ | 1951/2000 [4:43:03<04:49,  5.91s/it]

Episode 1951/2000, real env return = 9200.37


 98%|████████████████████████████████████▎| 1961/2000 [4:44:01<03:47,  5.83s/it]

Episode 1961/2000, real env return = 7797.69


 99%|████████████████████████████████████▍| 1971/2000 [4:44:59<02:48,  5.82s/it]

Episode 1971/2000, real env return = 9429.54


 99%|████████████████████████████████████▋| 1981/2000 [4:45:57<01:50,  5.81s/it]

Episode 1981/2000, real env return = 9276.13


100%|████████████████████████████████████▊| 1991/2000 [4:46:55<00:52,  5.86s/it]

Episode 1991/2000, real env return = 8791.17


100%|█████████████████████████████████████| 2000/2000 [4:47:47<00:00,  8.63s/it]


Training finished.


In [3]:
train2 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<10:42,  3.11it/s]

Episode 1/2000, real env return = -231.54


  1%|▏                                      | 11/2000 [00:44<4:59:11,  9.03s/it]

Episode 11/2000, real env return = -329.42


  1%|▍                                      | 21/2000 [02:52<6:58:57, 12.70s/it]

Episode 21/2000, real env return = -288.07


  2%|▌                                      | 31/2000 [05:01<7:03:40, 12.91s/it]

Episode 31/2000, real env return = -384.91


  2%|▊                                      | 41/2000 [07:10<7:02:18, 12.93s/it]

Episode 41/2000, real env return = -492.92


  3%|▉                                      | 51/2000 [09:19<6:57:46, 12.86s/it]

Episode 51/2000, real env return = -106.59


  3%|█▏                                     | 61/2000 [11:28<6:56:25, 12.89s/it]

Episode 61/2000, real env return = -145.47


  4%|█▍                                     | 71/2000 [13:37<6:54:53, 12.90s/it]

Episode 71/2000, real env return = -241.36


  4%|█▌                                     | 81/2000 [15:47<6:56:40, 13.03s/it]

Episode 81/2000, real env return = -252.63


  5%|█▊                                     | 91/2000 [17:57<6:54:33, 13.03s/it]

Episode 91/2000, real env return = -137.92


  5%|█▉                                    | 101/2000 [20:07<6:50:51, 12.98s/it]

Episode 101/2000, real env return = -334.07


  6%|██                                    | 111/2000 [22:19<6:54:57, 13.18s/it]

Episode 111/2000, real env return = -352.28


  6%|██▎                                   | 121/2000 [24:30<6:52:40, 13.18s/it]

Episode 121/2000, real env return = -368.49


  7%|██▍                                   | 131/2000 [26:41<6:47:54, 13.09s/it]

Episode 131/2000, real env return = -328.52


  7%|██▋                                   | 141/2000 [28:52<6:46:24, 13.12s/it]

Episode 141/2000, real env return = -195.55


  8%|██▊                                   | 151/2000 [31:03<6:45:42, 13.17s/it]

Episode 151/2000, real env return = -141.58


  8%|███                                   | 161/2000 [33:14<6:39:07, 13.02s/it]

Episode 161/2000, real env return = -320.23


  9%|███▏                                  | 171/2000 [35:25<6:40:11, 13.13s/it]

Episode 171/2000, real env return = 146.36


  9%|███▍                                  | 181/2000 [37:37<6:39:20, 13.17s/it]

Episode 181/2000, real env return = 446.30


 10%|███▋                                  | 191/2000 [39:48<6:33:59, 13.07s/it]

Episode 191/2000, real env return = 1135.02


 10%|███▊                                  | 201/2000 [41:58<6:30:18, 13.02s/it]

Episode 201/2000, real env return = 914.08


 11%|████                                  | 211/2000 [44:09<6:30:25, 13.09s/it]

Episode 211/2000, real env return = 803.58


 11%|████▏                                 | 221/2000 [46:21<6:30:40, 13.18s/it]

Episode 221/2000, real env return = 793.23


 12%|████▍                                 | 231/2000 [48:33<6:28:29, 13.18s/it]

Episode 231/2000, real env return = 1084.40


 12%|████▌                                 | 241/2000 [50:45<6:26:25, 13.18s/it]

Episode 241/2000, real env return = 1164.83


 13%|████▊                                 | 251/2000 [52:56<6:21:16, 13.08s/it]

Episode 251/2000, real env return = 1148.28


 13%|████▉                                 | 261/2000 [55:08<6:19:45, 13.10s/it]

Episode 261/2000, real env return = 1211.36


 14%|█████▏                                | 271/2000 [57:19<6:18:13, 13.13s/it]

Episode 271/2000, real env return = 1133.59


 14%|█████▎                                | 281/2000 [59:31<6:18:49, 13.22s/it]

Episode 281/2000, real env return = 1166.77


 15%|█████▏                              | 291/2000 [1:01:43<6:17:33, 13.26s/it]

Episode 291/2000, real env return = 1211.16


 15%|█████▍                              | 301/2000 [1:03:55<6:13:29, 13.19s/it]

Episode 301/2000, real env return = 1263.35


 16%|█████▌                              | 311/2000 [1:06:08<6:13:01, 13.25s/it]

Episode 311/2000, real env return = 1303.39


 16%|█████▊                              | 321/2000 [1:08:20<6:08:36, 13.17s/it]

Episode 321/2000, real env return = 1312.28


 17%|█████▉                              | 331/2000 [1:10:33<6:07:40, 13.22s/it]

Episode 331/2000, real env return = 1249.57


 17%|██████▏                             | 341/2000 [1:12:44<6:04:32, 13.18s/it]

Episode 341/2000, real env return = 1354.06


 18%|██████▎                             | 351/2000 [1:14:57<6:05:00, 13.28s/it]

Episode 351/2000, real env return = 1347.33


 18%|██████▍                             | 361/2000 [1:17:10<6:04:20, 13.34s/it]

Episode 361/2000, real env return = 1269.92


 19%|██████▋                             | 371/2000 [1:19:21<5:54:36, 13.06s/it]

Episode 371/2000, real env return = 1312.31


 19%|██████▊                             | 381/2000 [1:21:32<5:54:13, 13.13s/it]

Episode 381/2000, real env return = 1251.67


 20%|███████                             | 391/2000 [1:23:43<5:51:26, 13.11s/it]

Episode 391/2000, real env return = 1280.87


 20%|███████▏                            | 401/2000 [1:25:54<5:49:35, 13.12s/it]

Episode 401/2000, real env return = 1259.39


 21%|███████▍                            | 411/2000 [1:28:05<5:47:14, 13.11s/it]

Episode 411/2000, real env return = 1247.66


 21%|███████▌                            | 421/2000 [1:30:17<5:47:07, 13.19s/it]

Episode 421/2000, real env return = 1251.72


 22%|███████▊                            | 431/2000 [1:32:28<5:44:08, 13.16s/it]

Episode 431/2000, real env return = 1221.76


 22%|███████▉                            | 441/2000 [1:34:40<5:41:26, 13.14s/it]

Episode 441/2000, real env return = 1309.47


 23%|████████                            | 451/2000 [1:36:51<5:39:22, 13.15s/it]

Episode 451/2000, real env return = 1184.42


 23%|████████▎                           | 461/2000 [1:39:02<5:35:46, 13.09s/it]

Episode 461/2000, real env return = 1266.94


 24%|████████▍                           | 471/2000 [1:41:13<5:36:06, 13.19s/it]

Episode 471/2000, real env return = 1250.30


 24%|████████▋                           | 481/2000 [1:43:25<5:32:23, 13.13s/it]

Episode 481/2000, real env return = 1271.55


 25%|████████▊                           | 491/2000 [1:45:36<5:29:05, 13.09s/it]

Episode 491/2000, real env return = 1164.41


 25%|█████████                           | 501/2000 [1:47:48<5:29:55, 13.21s/it]

Episode 501/2000, real env return = 1315.79


 26%|█████████▏                          | 511/2000 [1:50:00<5:25:57, 13.13s/it]

Episode 511/2000, real env return = 1334.63


 26%|█████████▍                          | 521/2000 [1:52:11<5:24:45, 13.17s/it]

Episode 521/2000, real env return = 1375.71


 27%|█████████▌                          | 531/2000 [1:54:23<5:24:02, 13.24s/it]

Episode 531/2000, real env return = 1366.94


 27%|█████████▋                          | 541/2000 [1:56:35<5:21:14, 13.21s/it]

Episode 541/2000, real env return = 1303.31


 28%|█████████▉                          | 551/2000 [1:58:47<5:18:41, 13.20s/it]

Episode 551/2000, real env return = 1337.76


 28%|██████████                          | 561/2000 [2:01:00<5:19:17, 13.31s/it]

Episode 561/2000, real env return = 1322.88


 29%|██████████▎                         | 571/2000 [2:03:12<5:13:04, 13.15s/it]

Episode 571/2000, real env return = 1344.41


 29%|██████████▍                         | 581/2000 [2:05:24<5:11:08, 13.16s/it]

Episode 581/2000, real env return = 1303.98


 30%|██████████▋                         | 591/2000 [2:07:36<5:10:49, 13.24s/it]

Episode 591/2000, real env return = 1331.44


 30%|██████████▊                         | 601/2000 [2:09:48<5:06:49, 13.16s/it]

Episode 601/2000, real env return = 1366.58


 31%|██████████▉                         | 611/2000 [2:12:00<5:07:29, 13.28s/it]

Episode 611/2000, real env return = 1388.98


 31%|███████████▏                        | 621/2000 [2:14:13<5:03:17, 13.20s/it]

Episode 621/2000, real env return = 1269.92


 32%|███████████▎                        | 631/2000 [2:16:26<5:03:00, 13.28s/it]

Episode 631/2000, real env return = 1355.03


 32%|███████████▌                        | 641/2000 [2:18:38<5:00:57, 13.29s/it]

Episode 641/2000, real env return = 1330.94


 33%|███████████▋                        | 651/2000 [2:20:51<4:58:45, 13.29s/it]

Episode 651/2000, real env return = 1286.49


 33%|███████████▉                        | 661/2000 [2:23:04<4:57:11, 13.32s/it]

Episode 661/2000, real env return = 1417.06


 34%|████████████                        | 671/2000 [2:25:16<4:51:14, 13.15s/it]

Episode 671/2000, real env return = 1391.58


 34%|████████████▎                       | 681/2000 [2:27:28<4:48:44, 13.13s/it]

Episode 681/2000, real env return = 1400.08


 35%|████████████▍                       | 691/2000 [2:29:40<4:47:30, 13.18s/it]

Episode 691/2000, real env return = 1383.35


 35%|████████████▌                       | 701/2000 [2:31:52<4:45:46, 13.20s/it]

Episode 701/2000, real env return = 1383.73


 36%|████████████▊                       | 711/2000 [2:34:03<4:42:34, 13.15s/it]

Episode 711/2000, real env return = 1397.17


 36%|████████████▉                       | 721/2000 [2:36:15<4:40:16, 13.15s/it]

Episode 721/2000, real env return = 1402.18


 37%|█████████████▏                      | 731/2000 [2:38:26<4:37:39, 13.13s/it]

Episode 731/2000, real env return = 1381.06


 37%|█████████████▎                      | 741/2000 [2:40:37<4:34:55, 13.10s/it]

Episode 741/2000, real env return = 1438.55


 38%|█████████████▌                      | 751/2000 [2:42:49<4:34:09, 13.17s/it]

Episode 751/2000, real env return = 1476.86


 38%|█████████████▋                      | 761/2000 [2:45:01<4:31:43, 13.16s/it]

Episode 761/2000, real env return = 1500.51


 39%|█████████████▉                      | 771/2000 [2:47:12<4:27:48, 13.07s/it]

Episode 771/2000, real env return = 1465.95


 39%|██████████████                      | 781/2000 [2:49:23<4:25:59, 13.09s/it]

Episode 781/2000, real env return = 1421.56


 40%|██████████████▏                     | 791/2000 [2:51:35<4:25:50, 13.19s/it]

Episode 791/2000, real env return = 1488.63


 40%|██████████████▍                     | 801/2000 [2:53:27<2:54:24,  8.73s/it]

Episode 801/2000, real env return = 1447.53


 41%|██████████████▌                     | 811/2000 [2:54:25<1:56:10,  5.86s/it]

Episode 811/2000, real env return = 1453.30


 41%|██████████████▊                     | 821/2000 [2:55:23<1:53:40,  5.78s/it]

Episode 821/2000, real env return = 1436.90


 42%|██████████████▉                     | 831/2000 [2:56:20<1:51:47,  5.74s/it]

Episode 831/2000, real env return = 1438.55


 42%|███████████████▏                    | 841/2000 [2:57:18<1:49:56,  5.69s/it]

Episode 841/2000, real env return = 1267.73


 43%|███████████████▎                    | 851/2000 [2:58:15<1:51:05,  5.80s/it]

Episode 851/2000, real env return = 1375.86


 43%|███████████████▍                    | 861/2000 [2:59:13<1:49:13,  5.75s/it]

Episode 861/2000, real env return = 1346.96


 44%|███████████████▋                    | 871/2000 [3:00:10<1:47:45,  5.73s/it]

Episode 871/2000, real env return = 1316.37


 44%|███████████████▊                    | 881/2000 [3:01:08<1:47:13,  5.75s/it]

Episode 881/2000, real env return = 1397.16


 45%|████████████████                    | 891/2000 [3:02:05<1:46:46,  5.78s/it]

Episode 891/2000, real env return = 1363.25


 45%|████████████████▏                   | 901/2000 [3:03:03<1:45:01,  5.73s/it]

Episode 901/2000, real env return = 1474.98


 46%|████████████████▍                   | 911/2000 [3:04:02<1:46:32,  5.87s/it]

Episode 911/2000, real env return = 1316.50


 46%|████████████████▌                   | 921/2000 [3:05:00<1:44:59,  5.84s/it]

Episode 921/2000, real env return = 1408.34


 47%|████████████████▊                   | 931/2000 [3:05:57<1:42:06,  5.73s/it]

Episode 931/2000, real env return = 1403.90


 47%|████████████████▉                   | 941/2000 [3:06:55<1:41:43,  5.76s/it]

Episode 941/2000, real env return = 1419.60


 48%|█████████████████                   | 951/2000 [3:07:52<1:39:44,  5.70s/it]

Episode 951/2000, real env return = 1453.92


 48%|█████████████████▎                  | 961/2000 [3:08:50<1:40:52,  5.82s/it]

Episode 961/2000, real env return = 1402.53


 49%|█████████████████▍                  | 971/2000 [3:09:48<1:39:20,  5.79s/it]

Episode 971/2000, real env return = 1408.28


 49%|█████████████████▋                  | 981/2000 [3:10:47<1:40:07,  5.90s/it]

Episode 981/2000, real env return = 1520.06


 50%|█████████████████▊                  | 991/2000 [3:11:45<1:36:58,  5.77s/it]

Episode 991/2000, real env return = 1428.30


 50%|█████████████████▌                 | 1001/2000 [3:12:43<1:35:42,  5.75s/it]

Episode 1001/2000, real env return = 1491.43


 51%|█████████████████▋                 | 1011/2000 [3:13:40<1:34:23,  5.73s/it]

Episode 1011/2000, real env return = 1455.64


 51%|█████████████████▊                 | 1021/2000 [3:14:38<1:35:06,  5.83s/it]

Episode 1021/2000, real env return = 1543.75


 52%|██████████████████                 | 1031/2000 [3:15:36<1:32:57,  5.76s/it]

Episode 1031/2000, real env return = 1489.61


 52%|██████████████████▏                | 1041/2000 [3:16:33<1:32:37,  5.80s/it]

Episode 1041/2000, real env return = 1448.44


 53%|██████████████████▍                | 1051/2000 [3:17:30<1:30:55,  5.75s/it]

Episode 1051/2000, real env return = 1505.90


 53%|██████████████████▌                | 1061/2000 [3:18:28<1:30:23,  5.78s/it]

Episode 1061/2000, real env return = 1499.58


 54%|██████████████████▋                | 1071/2000 [3:19:26<1:30:33,  5.85s/it]

Episode 1071/2000, real env return = 1586.81


 54%|██████████████████▉                | 1081/2000 [3:20:25<1:30:14,  5.89s/it]

Episode 1081/2000, real env return = 1548.06


 55%|███████████████████                | 1091/2000 [3:21:24<1:28:02,  5.81s/it]

Episode 1091/2000, real env return = 1594.09


 55%|███████████████████▎               | 1101/2000 [3:22:22<1:27:34,  5.84s/it]

Episode 1101/2000, real env return = 1624.83


 56%|███████████████████▍               | 1111/2000 [3:23:20<1:25:53,  5.80s/it]

Episode 1111/2000, real env return = 1626.03


 56%|███████████████████▌               | 1121/2000 [3:24:18<1:25:14,  5.82s/it]

Episode 1121/2000, real env return = 1645.55


 57%|███████████████████▊               | 1131/2000 [3:25:15<1:23:00,  5.73s/it]

Episode 1131/2000, real env return = 1690.36


 57%|███████████████████▉               | 1141/2000 [3:26:13<1:23:07,  5.81s/it]

Episode 1141/2000, real env return = 1642.22


 58%|████████████████████▏              | 1151/2000 [3:27:12<1:21:41,  5.77s/it]

Episode 1151/2000, real env return = 1618.70


 58%|████████████████████▎              | 1161/2000 [3:28:10<1:20:57,  5.79s/it]

Episode 1161/2000, real env return = 1641.90


 59%|████████████████████▍              | 1171/2000 [3:29:08<1:20:05,  5.80s/it]

Episode 1171/2000, real env return = 1639.03


 59%|████████████████████▋              | 1181/2000 [3:30:06<1:18:47,  5.77s/it]

Episode 1181/2000, real env return = 1652.89


 60%|████████████████████▊              | 1191/2000 [3:31:04<1:18:31,  5.82s/it]

Episode 1191/2000, real env return = 1684.33


 60%|█████████████████████              | 1201/2000 [3:32:02<1:16:47,  5.77s/it]

Episode 1201/2000, real env return = 1721.26


 61%|█████████████████████▏             | 1211/2000 [3:33:00<1:15:40,  5.75s/it]

Episode 1211/2000, real env return = 1700.77


 61%|█████████████████████▎             | 1221/2000 [3:33:57<1:15:02,  5.78s/it]

Episode 1221/2000, real env return = 1700.56


 62%|█████████████████████▌             | 1231/2000 [3:34:55<1:14:04,  5.78s/it]

Episode 1231/2000, real env return = 1802.81


 62%|█████████████████████▋             | 1241/2000 [3:35:53<1:12:57,  5.77s/it]

Episode 1241/2000, real env return = 1813.25


 63%|█████████████████████▉             | 1251/2000 [3:36:51<1:12:03,  5.77s/it]

Episode 1251/2000, real env return = 1775.57


 63%|██████████████████████             | 1261/2000 [3:37:49<1:12:27,  5.88s/it]

Episode 1261/2000, real env return = 1769.07


 64%|██████████████████████▏            | 1271/2000 [3:38:48<1:10:45,  5.82s/it]

Episode 1271/2000, real env return = 1843.88


 64%|██████████████████████▍            | 1281/2000 [3:39:46<1:09:15,  5.78s/it]

Episode 1281/2000, real env return = 1780.66


 65%|██████████████████████▌            | 1291/2000 [3:40:43<1:08:04,  5.76s/it]

Episode 1291/2000, real env return = 1912.81


 65%|██████████████████████▊            | 1301/2000 [3:41:41<1:08:11,  5.85s/it]

Episode 1301/2000, real env return = 1797.81


 66%|██████████████████████▉            | 1311/2000 [3:42:40<1:06:45,  5.81s/it]

Episode 1311/2000, real env return = 1970.47


 66%|███████████████████████            | 1321/2000 [3:43:38<1:06:04,  5.84s/it]

Episode 1321/2000, real env return = 1892.98


 67%|███████████████████████▎           | 1331/2000 [3:44:36<1:04:20,  5.77s/it]

Episode 1331/2000, real env return = 2016.93


 67%|███████████████████████▍           | 1341/2000 [3:45:35<1:04:52,  5.91s/it]

Episode 1341/2000, real env return = 1968.23


 68%|███████████████████████▋           | 1351/2000 [3:46:32<1:02:42,  5.80s/it]

Episode 1351/2000, real env return = 1989.34


 68%|███████████████████████▊           | 1361/2000 [3:47:30<1:01:49,  5.80s/it]

Episode 1361/2000, real env return = 2020.69


 69%|███████████████████████▉           | 1371/2000 [3:48:28<1:00:06,  5.73s/it]

Episode 1371/2000, real env return = 1872.02


 69%|████████████████████████▏          | 1381/2000 [3:49:26<1:00:18,  5.85s/it]

Episode 1381/2000, real env return = 2007.75


 70%|█████████████████████████▋           | 1391/2000 [3:50:24<58:18,  5.74s/it]

Episode 1391/2000, real env return = 1958.49


 70%|█████████████████████████▉           | 1401/2000 [3:51:22<57:44,  5.78s/it]

Episode 1401/2000, real env return = 1939.04


 71%|██████████████████████████           | 1411/2000 [3:52:20<56:03,  5.71s/it]

Episode 1411/2000, real env return = 1968.48


 71%|██████████████████████████▎          | 1421/2000 [3:53:17<56:01,  5.81s/it]

Episode 1421/2000, real env return = 1897.23


 72%|██████████████████████████▍          | 1431/2000 [3:54:15<54:04,  5.70s/it]

Episode 1431/2000, real env return = 1937.75


 72%|██████████████████████████▋          | 1441/2000 [3:55:13<53:55,  5.79s/it]

Episode 1441/2000, real env return = 2019.33


 73%|██████████████████████████▊          | 1451/2000 [3:56:11<53:56,  5.89s/it]

Episode 1451/2000, real env return = 1955.05


 73%|███████████████████████████          | 1461/2000 [3:57:10<52:26,  5.84s/it]

Episode 1461/2000, real env return = 2052.86


 74%|███████████████████████████▏         | 1471/2000 [3:58:07<50:37,  5.74s/it]

Episode 1471/2000, real env return = 1970.30


 74%|███████████████████████████▍         | 1481/2000 [3:59:06<50:24,  5.83s/it]

Episode 1481/2000, real env return = 2100.20


 75%|███████████████████████████▌         | 1491/2000 [4:00:03<49:08,  5.79s/it]

Episode 1491/2000, real env return = 2035.57


 75%|███████████████████████████▊         | 1501/2000 [4:01:01<47:44,  5.74s/it]

Episode 1501/2000, real env return = 2218.47


 76%|███████████████████████████▉         | 1511/2000 [4:01:59<47:26,  5.82s/it]

Episode 1511/2000, real env return = 2193.38


 76%|████████████████████████████▏        | 1521/2000 [4:02:57<45:35,  5.71s/it]

Episode 1521/2000, real env return = 2098.99


 77%|████████████████████████████▎        | 1531/2000 [4:03:54<45:07,  5.77s/it]

Episode 1531/2000, real env return = 2138.49


 77%|████████████████████████████▌        | 1541/2000 [4:04:52<44:10,  5.77s/it]

Episode 1541/2000, real env return = 2078.73


 78%|████████████████████████████▋        | 1551/2000 [4:05:50<43:10,  5.77s/it]

Episode 1551/2000, real env return = 2176.32


 78%|████████████████████████████▉        | 1561/2000 [4:06:48<41:59,  5.74s/it]

Episode 1561/2000, real env return = 2261.25


 79%|█████████████████████████████        | 1571/2000 [4:07:46<41:20,  5.78s/it]

Episode 1571/2000, real env return = 2168.21


 79%|█████████████████████████████▏       | 1581/2000 [4:08:43<40:06,  5.74s/it]

Episode 1581/2000, real env return = 2275.97


 80%|█████████████████████████████▍       | 1591/2000 [4:09:41<39:10,  5.75s/it]

Episode 1591/2000, real env return = 2205.98


 80%|█████████████████████████████▌       | 1601/2000 [4:10:39<38:30,  5.79s/it]

Episode 1601/2000, real env return = 2158.14


 81%|█████████████████████████████▊       | 1611/2000 [4:11:37<37:08,  5.73s/it]

Episode 1611/2000, real env return = 2218.79


 81%|█████████████████████████████▉       | 1621/2000 [4:12:34<36:32,  5.78s/it]

Episode 1621/2000, real env return = 2240.32


 82%|██████████████████████████████▏      | 1631/2000 [4:13:32<35:10,  5.72s/it]

Episode 1631/2000, real env return = 2340.32


 82%|██████████████████████████████▎      | 1641/2000 [4:14:29<34:31,  5.77s/it]

Episode 1641/2000, real env return = 2292.58


 83%|██████████████████████████████▌      | 1651/2000 [4:15:27<33:24,  5.74s/it]

Episode 1651/2000, real env return = 2352.57


 83%|██████████████████████████████▋      | 1661/2000 [4:16:24<32:23,  5.73s/it]

Episode 1661/2000, real env return = 2293.61


 84%|██████████████████████████████▉      | 1671/2000 [4:17:22<31:42,  5.78s/it]

Episode 1671/2000, real env return = 2423.30


 84%|███████████████████████████████      | 1681/2000 [4:18:20<30:30,  5.74s/it]

Episode 1681/2000, real env return = 2396.31


 85%|███████████████████████████████▎     | 1691/2000 [4:19:18<29:55,  5.81s/it]

Episode 1691/2000, real env return = 2359.99


 85%|███████████████████████████████▍     | 1701/2000 [4:20:15<28:37,  5.75s/it]

Episode 1701/2000, real env return = 2509.98


 86%|███████████████████████████████▋     | 1711/2000 [4:21:14<27:58,  5.81s/it]

Episode 1711/2000, real env return = 2491.47


 86%|███████████████████████████████▊     | 1721/2000 [4:22:12<26:42,  5.74s/it]

Episode 1721/2000, real env return = 2592.44


 87%|████████████████████████████████     | 1731/2000 [4:23:10<26:25,  5.89s/it]

Episode 1731/2000, real env return = 2575.90


 87%|████████████████████████████████▏    | 1741/2000 [4:24:08<24:48,  5.75s/it]

Episode 1741/2000, real env return = 2703.19


 88%|████████████████████████████████▍    | 1751/2000 [4:25:06<23:58,  5.78s/it]

Episode 1751/2000, real env return = 2552.71


 88%|████████████████████████████████▌    | 1761/2000 [4:26:03<22:49,  5.73s/it]

Episode 1761/2000, real env return = 2770.64


 89%|████████████████████████████████▊    | 1771/2000 [4:27:01<21:58,  5.76s/it]

Episode 1771/2000, real env return = 2811.21


 89%|████████████████████████████████▉    | 1781/2000 [4:27:58<20:48,  5.70s/it]

Episode 1781/2000, real env return = 2734.89


 90%|█████████████████████████████████▏   | 1791/2000 [4:28:56<20:07,  5.78s/it]

Episode 1791/2000, real env return = 2776.80


 90%|█████████████████████████████████▎   | 1801/2000 [4:29:54<19:14,  5.80s/it]

Episode 1801/2000, real env return = 2792.91


 91%|█████████████████████████████████▌   | 1811/2000 [4:30:52<18:08,  5.76s/it]

Episode 1811/2000, real env return = 2934.68


 91%|█████████████████████████████████▋   | 1821/2000 [4:31:49<17:09,  5.75s/it]

Episode 1821/2000, real env return = 2972.02


 92%|█████████████████████████████████▊   | 1831/2000 [4:32:47<16:14,  5.77s/it]

Episode 1831/2000, real env return = 2941.03


 92%|██████████████████████████████████   | 1841/2000 [4:33:45<15:28,  5.84s/it]

Episode 1841/2000, real env return = 3023.64


 93%|██████████████████████████████████▏  | 1851/2000 [4:34:43<14:33,  5.86s/it]

Episode 1851/2000, real env return = 2859.17


 93%|██████████████████████████████████▍  | 1861/2000 [4:35:41<13:28,  5.82s/it]

Episode 1861/2000, real env return = 2949.37


 94%|██████████████████████████████████▌  | 1871/2000 [4:36:40<12:27,  5.80s/it]

Episode 1871/2000, real env return = 2767.90


 94%|██████████████████████████████████▊  | 1881/2000 [4:37:38<11:29,  5.79s/it]

Episode 1881/2000, real env return = 3018.66


 95%|██████████████████████████████████▉  | 1891/2000 [4:38:35<10:27,  5.76s/it]

Episode 1891/2000, real env return = 3158.38


 95%|███████████████████████████████████▏ | 1901/2000 [4:39:33<09:39,  5.85s/it]

Episode 1901/2000, real env return = 3173.05


 96%|███████████████████████████████████▎ | 1911/2000 [4:40:31<08:36,  5.80s/it]

Episode 1911/2000, real env return = 3020.34


 96%|███████████████████████████████████▌ | 1921/2000 [4:41:29<07:36,  5.77s/it]

Episode 1921/2000, real env return = 3253.18


 97%|███████████████████████████████████▋ | 1931/2000 [4:42:27<06:39,  5.79s/it]

Episode 1931/2000, real env return = 3444.50


 97%|███████████████████████████████████▉ | 1941/2000 [4:43:25<05:43,  5.81s/it]

Episode 1941/2000, real env return = 2976.98


 98%|████████████████████████████████████ | 1951/2000 [4:44:22<04:43,  5.78s/it]

Episode 1951/2000, real env return = 3090.38


 98%|████████████████████████████████████▎| 1961/2000 [4:45:21<03:45,  5.77s/it]

Episode 1961/2000, real env return = 3178.27


 99%|████████████████████████████████████▍| 1971/2000 [4:46:18<02:47,  5.77s/it]

Episode 1971/2000, real env return = 3277.75


 99%|████████████████████████████████████▋| 1981/2000 [4:47:16<01:50,  5.80s/it]

Episode 1981/2000, real env return = 3414.42


100%|████████████████████████████████████▊| 1991/2000 [4:48:14<00:52,  5.84s/it]

Episode 1991/2000, real env return = 3194.33


100%|█████████████████████████████████████| 2000/2000 [4:49:07<00:00,  8.67s/it]


Training finished.


In [4]:
train3 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<26:21,  1.26it/s]

Episode 1/2000, real env return = -164.13


  1%|▏                                      | 11/2000 [00:56<5:36:34, 10.15s/it]

Episode 11/2000, real env return = -400.90


  1%|▍                                      | 21/2000 [03:06<7:06:18, 12.93s/it]

Episode 21/2000, real env return = -328.62


  2%|▌                                      | 31/2000 [05:14<7:04:30, 12.94s/it]

Episode 31/2000, real env return = -520.96


  2%|▊                                      | 41/2000 [07:24<7:01:19, 12.90s/it]

Episode 41/2000, real env return = -511.68


  3%|▉                                      | 51/2000 [09:32<6:59:20, 12.91s/it]

Episode 51/2000, real env return = -148.41


  3%|█▏                                     | 61/2000 [11:41<6:55:57, 12.87s/it]

Episode 61/2000, real env return = -181.25


  4%|█▍                                     | 71/2000 [13:51<6:54:23, 12.89s/it]

Episode 71/2000, real env return = -258.18


  4%|█▌                                     | 81/2000 [16:00<6:57:36, 13.06s/it]

Episode 81/2000, real env return = -309.52


  5%|█▊                                     | 91/2000 [18:11<6:53:18, 12.99s/it]

Episode 91/2000, real env return = -254.76


  5%|█▉                                    | 101/2000 [20:21<6:53:27, 13.06s/it]

Episode 101/2000, real env return = -191.65


  6%|██                                    | 111/2000 [22:31<6:50:42, 13.05s/it]

Episode 111/2000, real env return = -181.62


  6%|██▎                                   | 121/2000 [24:42<6:49:10, 13.07s/it]

Episode 121/2000, real env return = -188.79


  7%|██▍                                   | 131/2000 [26:53<6:46:34, 13.05s/it]

Episode 131/2000, real env return = -240.73


  7%|██▋                                   | 141/2000 [29:03<6:43:09, 13.01s/it]

Episode 141/2000, real env return = -189.59


  8%|██▊                                   | 151/2000 [31:13<6:42:19, 13.06s/it]

Episode 151/2000, real env return = -196.97


  8%|███                                   | 161/2000 [33:24<6:38:32, 13.00s/it]

Episode 161/2000, real env return = -181.31


  9%|███▏                                  | 171/2000 [35:34<6:37:38, 13.04s/it]

Episode 171/2000, real env return = -164.87


  9%|███▍                                  | 181/2000 [37:45<6:35:12, 13.04s/it]

Episode 181/2000, real env return = -136.75


 10%|███▋                                  | 191/2000 [39:56<6:32:31, 13.02s/it]

Episode 191/2000, real env return = -302.21


 10%|███▊                                  | 201/2000 [42:07<6:32:58, 13.11s/it]

Episode 201/2000, real env return = -164.89


 11%|████                                  | 211/2000 [44:18<6:29:42, 13.07s/it]

Episode 211/2000, real env return = -27.07


 11%|████▏                                 | 221/2000 [46:29<6:28:36, 13.11s/it]

Episode 221/2000, real env return = 20.17


 12%|████▍                                 | 231/2000 [48:40<6:26:00, 13.09s/it]

Episode 231/2000, real env return = -75.17


 12%|████▌                                 | 241/2000 [50:51<6:24:12, 13.11s/it]

Episode 241/2000, real env return = -19.28


 13%|████▊                                 | 251/2000 [53:02<6:21:56, 13.10s/it]

Episode 251/2000, real env return = 915.05


 13%|████▉                                 | 261/2000 [55:13<6:19:45, 13.10s/it]

Episode 261/2000, real env return = -1454.62


 14%|█████▏                                | 271/2000 [57:23<6:17:36, 13.10s/it]

Episode 271/2000, real env return = 781.41


 14%|█████▎                                | 281/2000 [59:34<6:16:11, 13.13s/it]

Episode 281/2000, real env return = 1559.42


 15%|█████▏                              | 291/2000 [1:01:45<6:13:20, 13.11s/it]

Episode 291/2000, real env return = 1106.95


 15%|█████▍                              | 301/2000 [1:03:57<6:10:44, 13.09s/it]

Episode 301/2000, real env return = 1093.24


 16%|█████▌                              | 311/2000 [1:06:08<6:09:14, 13.12s/it]

Episode 311/2000, real env return = 1531.27


 16%|█████▊                              | 321/2000 [1:08:19<6:05:46, 13.07s/it]

Episode 321/2000, real env return = 1442.86


 17%|█████▉                              | 331/2000 [1:10:30<6:04:05, 13.09s/it]

Episode 331/2000, real env return = 1779.03


 17%|██████▏                             | 341/2000 [1:12:41<6:03:17, 13.14s/it]

Episode 341/2000, real env return = 1270.42


 18%|██████▎                             | 351/2000 [1:14:53<6:01:13, 13.14s/it]

Episode 351/2000, real env return = 1348.98


 18%|██████▍                             | 361/2000 [1:17:04<5:57:48, 13.10s/it]

Episode 361/2000, real env return = 1707.59


 19%|██████▋                             | 371/2000 [1:19:15<5:57:16, 13.16s/it]

Episode 371/2000, real env return = 1487.16


 19%|██████▊                             | 381/2000 [1:21:27<5:53:29, 13.10s/it]

Episode 381/2000, real env return = 1490.52


 20%|███████                             | 391/2000 [1:23:38<5:50:45, 13.08s/it]

Episode 391/2000, real env return = 1583.72


 20%|███████▏                            | 401/2000 [1:25:49<5:48:42, 13.08s/it]

Episode 401/2000, real env return = 1963.40


 21%|███████▍                            | 411/2000 [1:28:00<5:47:28, 13.12s/it]

Episode 411/2000, real env return = 1512.84


 21%|███████▌                            | 421/2000 [1:30:11<5:47:15, 13.20s/it]

Episode 421/2000, real env return = 1586.03


 22%|███████▊                            | 431/2000 [1:32:23<5:41:47, 13.07s/it]

Episode 431/2000, real env return = 1547.17


 22%|███████▉                            | 441/2000 [1:34:34<5:41:32, 13.14s/it]

Episode 441/2000, real env return = 1754.34


 23%|████████                            | 451/2000 [1:36:45<5:39:29, 13.15s/it]

Episode 451/2000, real env return = 1577.64


 23%|████████▎                           | 461/2000 [1:38:56<5:35:36, 13.08s/it]

Episode 461/2000, real env return = 1727.29


 24%|████████▍                           | 471/2000 [1:41:07<5:33:40, 13.09s/it]

Episode 471/2000, real env return = 2113.31


 24%|████████▋                           | 481/2000 [1:43:18<5:31:31, 13.10s/it]

Episode 481/2000, real env return = 1836.38


 25%|████████▊                           | 491/2000 [1:45:30<5:29:52, 13.12s/it]

Episode 491/2000, real env return = 1826.55


 25%|█████████                           | 501/2000 [1:47:41<5:27:30, 13.11s/it]

Episode 501/2000, real env return = 1866.26


 26%|█████████▏                          | 511/2000 [1:49:53<5:29:12, 13.27s/it]

Episode 511/2000, real env return = 2141.34


 26%|█████████▍                          | 521/2000 [1:52:05<5:24:12, 13.15s/it]

Episode 521/2000, real env return = 2385.89


 27%|█████████▌                          | 531/2000 [1:54:16<5:22:04, 13.15s/it]

Episode 531/2000, real env return = 2003.41


 27%|█████████▋                          | 541/2000 [1:56:28<5:22:24, 13.26s/it]

Episode 541/2000, real env return = 2385.22


 28%|█████████▉                          | 551/2000 [1:58:40<5:17:42, 13.16s/it]

Episode 551/2000, real env return = 2315.11


 28%|██████████                          | 561/2000 [2:00:51<5:14:43, 13.12s/it]

Episode 561/2000, real env return = 2160.28


 29%|██████████▎                         | 571/2000 [2:03:03<5:13:09, 13.15s/it]

Episode 571/2000, real env return = 2206.16


 29%|██████████▍                         | 581/2000 [2:05:14<5:11:44, 13.18s/it]

Episode 581/2000, real env return = 2255.61


 30%|██████████▋                         | 591/2000 [2:07:26<5:08:41, 13.14s/it]

Episode 591/2000, real env return = 2430.70


 30%|██████████▊                         | 601/2000 [2:09:37<5:09:06, 13.26s/it]

Episode 601/2000, real env return = 2531.07


 31%|██████████▉                         | 611/2000 [2:11:49<5:03:40, 13.12s/it]

Episode 611/2000, real env return = 2328.99


 31%|███████████▏                        | 621/2000 [2:14:00<5:02:05, 13.14s/it]

Episode 621/2000, real env return = 2576.95


 32%|███████████▎                        | 631/2000 [2:16:11<5:01:03, 13.19s/it]

Episode 631/2000, real env return = 1840.03


 32%|███████████▌                        | 641/2000 [2:18:23<4:56:56, 13.11s/it]

Episode 641/2000, real env return = 2865.96


 33%|███████████▋                        | 651/2000 [2:20:35<4:56:16, 13.18s/it]

Episode 651/2000, real env return = 2919.71


 33%|███████████▉                        | 661/2000 [2:22:47<4:54:22, 13.19s/it]

Episode 661/2000, real env return = 2876.92


 34%|████████████                        | 671/2000 [2:24:58<4:52:31, 13.21s/it]

Episode 671/2000, real env return = 2949.67


 34%|████████████▎                       | 681/2000 [2:27:10<4:49:46, 13.18s/it]

Episode 681/2000, real env return = 3122.28


 35%|████████████▍                       | 691/2000 [2:29:21<4:45:10, 13.07s/it]

Episode 691/2000, real env return = 3064.60


 35%|████████████▌                       | 701/2000 [2:31:33<4:46:26, 13.23s/it]

Episode 701/2000, real env return = 3259.55


 36%|████████████▊                       | 711/2000 [2:33:45<4:43:33, 13.20s/it]

Episode 711/2000, real env return = 2943.91


 36%|████████████▉                       | 721/2000 [2:35:55<4:39:00, 13.09s/it]

Episode 721/2000, real env return = 3405.75


 37%|█████████████▏                      | 731/2000 [2:38:07<4:38:40, 13.18s/it]

Episode 731/2000, real env return = 3446.86


 37%|█████████████▎                      | 741/2000 [2:40:18<4:34:43, 13.09s/it]

Episode 741/2000, real env return = 3556.90


 38%|█████████████▌                      | 751/2000 [2:42:30<4:32:07, 13.07s/it]

Episode 751/2000, real env return = 3236.66


 38%|█████████████▋                      | 761/2000 [2:44:41<4:30:22, 13.09s/it]

Episode 761/2000, real env return = 3385.73


 39%|█████████████▉                      | 771/2000 [2:46:52<4:29:53, 13.18s/it]

Episode 771/2000, real env return = 3559.54


 39%|██████████████                      | 781/2000 [2:49:04<4:27:43, 13.18s/it]

Episode 781/2000, real env return = 3521.10


 40%|██████████████▏                     | 791/2000 [2:50:59<3:03:59,  9.13s/it]

Episode 791/2000, real env return = 3651.67


 40%|██████████████▍                     | 801/2000 [2:51:56<1:56:21,  5.82s/it]

Episode 801/2000, real env return = 3541.88


 41%|██████████████▌                     | 811/2000 [2:52:54<1:53:28,  5.73s/it]

Episode 811/2000, real env return = 3708.70


 41%|██████████████▊                     | 821/2000 [2:53:52<1:53:50,  5.79s/it]

Episode 821/2000, real env return = 3542.57


 42%|██████████████▉                     | 831/2000 [2:54:49<1:51:33,  5.73s/it]

Episode 831/2000, real env return = 3631.61


 42%|███████████████▏                    | 841/2000 [2:55:47<1:50:33,  5.72s/it]

Episode 841/2000, real env return = 3787.31


 43%|███████████████▎                    | 851/2000 [2:56:44<1:48:36,  5.67s/it]

Episode 851/2000, real env return = 3744.82


 43%|███████████████▍                    | 861/2000 [2:57:42<1:51:00,  5.85s/it]

Episode 861/2000, real env return = 3828.52


 44%|███████████████▋                    | 871/2000 [2:58:39<1:47:39,  5.72s/it]

Episode 871/2000, real env return = 3630.20


 44%|███████████████▊                    | 881/2000 [2:59:37<1:49:19,  5.86s/it]

Episode 881/2000, real env return = 3862.79


 45%|████████████████                    | 891/2000 [3:00:36<1:46:55,  5.78s/it]

Episode 891/2000, real env return = 3732.46


 45%|████████████████▏                   | 901/2000 [3:01:34<1:45:59,  5.79s/it]

Episode 901/2000, real env return = 4026.01


 46%|████████████████▍                   | 911/2000 [3:02:32<1:44:22,  5.75s/it]

Episode 911/2000, real env return = 3719.99


 46%|████████████████▌                   | 921/2000 [3:03:30<1:45:43,  5.88s/it]

Episode 921/2000, real env return = 3468.63


 47%|████████████████▊                   | 931/2000 [3:04:28<1:44:24,  5.86s/it]

Episode 931/2000, real env return = 3817.92


 47%|████████████████▉                   | 941/2000 [3:05:26<1:41:49,  5.77s/it]

Episode 941/2000, real env return = 3990.89


 48%|█████████████████                   | 951/2000 [3:06:23<1:42:47,  5.88s/it]

Episode 951/2000, real env return = 3681.79


 48%|█████████████████▎                  | 961/2000 [3:07:22<1:40:37,  5.81s/it]

Episode 961/2000, real env return = 3715.05


 49%|█████████████████▍                  | 971/2000 [3:08:20<1:39:11,  5.78s/it]

Episode 971/2000, real env return = 3865.86


 49%|█████████████████▋                  | 981/2000 [3:09:18<1:37:47,  5.76s/it]

Episode 981/2000, real env return = 3988.97


 50%|█████████████████▊                  | 991/2000 [3:10:15<1:36:11,  5.72s/it]

Episode 991/2000, real env return = 4049.98


 50%|█████████████████▌                 | 1001/2000 [3:11:13<1:35:49,  5.76s/it]

Episode 1001/2000, real env return = 4056.25


 51%|█████████████████▋                 | 1011/2000 [3:12:11<1:35:21,  5.79s/it]

Episode 1011/2000, real env return = 4071.48


 51%|█████████████████▊                 | 1021/2000 [3:13:08<1:33:53,  5.75s/it]

Episode 1021/2000, real env return = 4179.01


 52%|██████████████████                 | 1031/2000 [3:14:05<1:32:37,  5.74s/it]

Episode 1031/2000, real env return = 4079.15


 52%|██████████████████▏                | 1041/2000 [3:15:03<1:31:31,  5.73s/it]

Episode 1041/2000, real env return = 4219.26


 53%|██████████████████▍                | 1051/2000 [3:16:00<1:30:25,  5.72s/it]

Episode 1051/2000, real env return = 4091.10


 53%|██████████████████▌                | 1061/2000 [3:16:58<1:29:57,  5.75s/it]

Episode 1061/2000, real env return = 4152.65


 54%|██████████████████▋                | 1071/2000 [3:17:55<1:28:33,  5.72s/it]

Episode 1071/2000, real env return = 4145.40


 54%|██████████████████▉                | 1081/2000 [3:18:53<1:28:57,  5.81s/it]

Episode 1081/2000, real env return = 2406.66


 55%|███████████████████                | 1091/2000 [3:19:51<1:27:17,  5.76s/it]

Episode 1091/2000, real env return = 4146.30


 55%|███████████████████▎               | 1101/2000 [3:20:48<1:26:14,  5.76s/it]

Episode 1101/2000, real env return = 4228.88


 56%|███████████████████▍               | 1111/2000 [3:21:46<1:25:38,  5.78s/it]

Episode 1111/2000, real env return = 4103.39


 56%|███████████████████▌               | 1121/2000 [3:22:44<1:25:42,  5.85s/it]

Episode 1121/2000, real env return = 4046.87


 57%|███████████████████▊               | 1131/2000 [3:23:43<1:24:26,  5.83s/it]

Episode 1131/2000, real env return = 4064.93


 57%|███████████████████▉               | 1141/2000 [3:24:40<1:22:09,  5.74s/it]

Episode 1141/2000, real env return = 2584.49


 58%|████████████████████▏              | 1151/2000 [3:25:38<1:22:02,  5.80s/it]

Episode 1151/2000, real env return = 4345.37


 58%|████████████████████▎              | 1161/2000 [3:26:35<1:19:30,  5.69s/it]

Episode 1161/2000, real env return = 4187.72


 59%|████████████████████▍              | 1171/2000 [3:27:33<1:19:10,  5.73s/it]

Episode 1171/2000, real env return = 4185.96


 59%|████████████████████▋              | 1181/2000 [3:28:31<1:20:02,  5.86s/it]

Episode 1181/2000, real env return = 4188.03


 60%|████████████████████▊              | 1191/2000 [3:29:29<1:18:10,  5.80s/it]

Episode 1191/2000, real env return = 4295.74


 60%|█████████████████████              | 1201/2000 [3:30:28<1:17:30,  5.82s/it]

Episode 1201/2000, real env return = 4323.93


 61%|█████████████████████▏             | 1211/2000 [3:31:25<1:15:53,  5.77s/it]

Episode 1211/2000, real env return = 4278.23


 61%|█████████████████████▎             | 1221/2000 [3:32:23<1:14:51,  5.77s/it]

Episode 1221/2000, real env return = 4360.22


 62%|█████████████████████▌             | 1231/2000 [3:33:22<1:13:58,  5.77s/it]

Episode 1231/2000, real env return = 4283.27


 62%|█████████████████████▋             | 1241/2000 [3:34:19<1:12:22,  5.72s/it]

Episode 1241/2000, real env return = 4438.37


 63%|█████████████████████▉             | 1251/2000 [3:35:17<1:12:26,  5.80s/it]

Episode 1251/2000, real env return = 4345.92


 63%|██████████████████████             | 1261/2000 [3:36:15<1:11:29,  5.80s/it]

Episode 1261/2000, real env return = 4218.75


 64%|██████████████████████▏            | 1271/2000 [3:37:12<1:10:28,  5.80s/it]

Episode 1271/2000, real env return = 4457.60


 64%|██████████████████████▍            | 1281/2000 [3:38:10<1:08:53,  5.75s/it]

Episode 1281/2000, real env return = 4171.07


 65%|██████████████████████▌            | 1291/2000 [3:39:09<1:08:13,  5.77s/it]

Episode 1291/2000, real env return = 4182.36


 65%|██████████████████████▊            | 1301/2000 [3:40:06<1:07:07,  5.76s/it]

Episode 1301/2000, real env return = 4043.52


 66%|██████████████████████▉            | 1311/2000 [3:41:04<1:05:38,  5.72s/it]

Episode 1311/2000, real env return = 4413.26


 66%|███████████████████████            | 1321/2000 [3:42:02<1:05:10,  5.76s/it]

Episode 1321/2000, real env return = 3832.44


 67%|███████████████████████▎           | 1331/2000 [3:42:59<1:04:14,  5.76s/it]

Episode 1331/2000, real env return = 4213.56


 67%|███████████████████████▍           | 1341/2000 [3:43:57<1:03:58,  5.83s/it]

Episode 1341/2000, real env return = 4255.36


 68%|███████████████████████▋           | 1351/2000 [3:44:55<1:03:16,  5.85s/it]

Episode 1351/2000, real env return = 4300.88


 68%|███████████████████████▊           | 1361/2000 [3:45:52<1:01:05,  5.74s/it]

Episode 1361/2000, real env return = 4221.68


 69%|███████████████████████▉           | 1371/2000 [3:46:49<1:00:12,  5.74s/it]

Episode 1371/2000, real env return = 4526.53


 69%|█████████████████████████▌           | 1381/2000 [3:47:48<59:56,  5.81s/it]

Episode 1381/2000, real env return = 4369.64


 70%|████████████████████████▎          | 1391/2000 [3:48:47<1:00:16,  5.94s/it]

Episode 1391/2000, real env return = 4229.51


 70%|█████████████████████████▉           | 1401/2000 [3:49:45<57:11,  5.73s/it]

Episode 1401/2000, real env return = 2148.09


 71%|██████████████████████████           | 1411/2000 [3:50:44<57:26,  5.85s/it]

Episode 1411/2000, real env return = 4124.50


 71%|██████████████████████████▎          | 1421/2000 [3:51:41<55:45,  5.78s/it]

Episode 1421/2000, real env return = 4355.85


 72%|██████████████████████████▍          | 1431/2000 [3:52:39<54:46,  5.78s/it]

Episode 1431/2000, real env return = 4361.14


 72%|██████████████████████████▋          | 1441/2000 [3:53:38<54:26,  5.84s/it]

Episode 1441/2000, real env return = 4186.25


 73%|██████████████████████████▊          | 1451/2000 [3:54:36<52:52,  5.78s/it]

Episode 1451/2000, real env return = 4394.95


 73%|███████████████████████████          | 1461/2000 [3:55:34<52:28,  5.84s/it]

Episode 1461/2000, real env return = 4414.22


 74%|███████████████████████████▏         | 1471/2000 [3:56:32<51:04,  5.79s/it]

Episode 1471/2000, real env return = 4588.52


 74%|███████████████████████████▍         | 1481/2000 [3:57:30<50:01,  5.78s/it]

Episode 1481/2000, real env return = 4425.55


 75%|███████████████████████████▌         | 1491/2000 [3:58:29<49:23,  5.82s/it]

Episode 1491/2000, real env return = 4535.39


 75%|███████████████████████████▊         | 1501/2000 [3:59:27<48:06,  5.79s/it]

Episode 1501/2000, real env return = 4582.18


 76%|███████████████████████████▉         | 1511/2000 [4:00:24<46:51,  5.75s/it]

Episode 1511/2000, real env return = 4381.23


 76%|████████████████████████████▏        | 1521/2000 [4:01:22<46:21,  5.81s/it]

Episode 1521/2000, real env return = 4608.73


 77%|████████████████████████████▎        | 1531/2000 [4:02:19<44:48,  5.73s/it]

Episode 1531/2000, real env return = 4416.33


 77%|████████████████████████████▌        | 1541/2000 [4:03:18<44:27,  5.81s/it]

Episode 1541/2000, real env return = 4569.87


 78%|████████████████████████████▋        | 1551/2000 [4:04:15<42:47,  5.72s/it]

Episode 1551/2000, real env return = 4737.57


 78%|████████████████████████████▉        | 1561/2000 [4:05:13<42:59,  5.88s/it]

Episode 1561/2000, real env return = 4540.71


 79%|█████████████████████████████        | 1571/2000 [4:06:11<41:02,  5.74s/it]

Episode 1571/2000, real env return = 4612.73


 79%|█████████████████████████████▏       | 1581/2000 [4:07:09<40:19,  5.77s/it]

Episode 1581/2000, real env return = 4784.33


 80%|█████████████████████████████▍       | 1591/2000 [4:08:06<39:08,  5.74s/it]

Episode 1591/2000, real env return = 4588.95


 80%|█████████████████████████████▌       | 1601/2000 [4:09:03<37:49,  5.69s/it]

Episode 1601/2000, real env return = 4490.51


 81%|█████████████████████████████▊       | 1611/2000 [4:10:01<37:15,  5.75s/it]

Episode 1611/2000, real env return = 4677.79


 81%|█████████████████████████████▉       | 1621/2000 [4:10:58<36:10,  5.73s/it]

Episode 1621/2000, real env return = 4618.37


 82%|██████████████████████████████▏      | 1631/2000 [4:11:56<35:12,  5.72s/it]

Episode 1631/2000, real env return = 4674.60


 82%|██████████████████████████████▎      | 1641/2000 [4:12:53<34:12,  5.72s/it]

Episode 1641/2000, real env return = 4657.86


 83%|██████████████████████████████▌      | 1651/2000 [4:13:52<34:06,  5.87s/it]

Episode 1651/2000, real env return = 4561.33


 83%|██████████████████████████████▋      | 1661/2000 [4:14:51<33:05,  5.86s/it]

Episode 1661/2000, real env return = 4650.18


 84%|██████████████████████████████▉      | 1671/2000 [4:15:48<31:31,  5.75s/it]

Episode 1671/2000, real env return = 4609.79


 84%|███████████████████████████████      | 1681/2000 [4:16:46<31:15,  5.88s/it]

Episode 1681/2000, real env return = 4869.03


 85%|███████████████████████████████▎     | 1691/2000 [4:17:45<29:55,  5.81s/it]

Episode 1691/2000, real env return = 4430.41


 85%|███████████████████████████████▍     | 1701/2000 [4:18:43<28:43,  5.76s/it]

Episode 1701/2000, real env return = 4728.30


 86%|███████████████████████████████▋     | 1711/2000 [4:19:41<28:10,  5.85s/it]

Episode 1711/2000, real env return = 4665.06


 86%|███████████████████████████████▊     | 1721/2000 [4:20:39<26:39,  5.73s/it]

Episode 1721/2000, real env return = 4851.65


 87%|████████████████████████████████     | 1731/2000 [4:21:37<25:50,  5.76s/it]

Episode 1731/2000, real env return = 4772.78


 87%|████████████████████████████████▏    | 1741/2000 [4:22:34<24:54,  5.77s/it]

Episode 1741/2000, real env return = 4981.21


 88%|████████████████████████████████▍    | 1751/2000 [4:23:32<24:00,  5.78s/it]

Episode 1751/2000, real env return = 4767.11


 88%|████████████████████████████████▌    | 1761/2000 [4:24:30<23:12,  5.83s/it]

Episode 1761/2000, real env return = 4847.58


 89%|████████████████████████████████▊    | 1771/2000 [4:25:28<21:55,  5.74s/it]

Episode 1771/2000, real env return = 5029.35


 89%|████████████████████████████████▉    | 1781/2000 [4:26:26<21:20,  5.85s/it]

Episode 1781/2000, real env return = 4925.48


 90%|█████████████████████████████████▏   | 1791/2000 [4:27:24<20:07,  5.78s/it]

Episode 1791/2000, real env return = 5175.58


 90%|█████████████████████████████████▎   | 1801/2000 [4:28:22<19:30,  5.88s/it]

Episode 1801/2000, real env return = 5194.45


 91%|█████████████████████████████████▌   | 1811/2000 [4:29:21<18:32,  5.88s/it]

Episode 1811/2000, real env return = 4869.20


 91%|█████████████████████████████████▋   | 1821/2000 [4:30:19<17:34,  5.89s/it]

Episode 1821/2000, real env return = 5153.88


 92%|█████████████████████████████████▊   | 1831/2000 [4:31:17<16:11,  5.75s/it]

Episode 1831/2000, real env return = 4948.97


 92%|██████████████████████████████████   | 1841/2000 [4:32:15<15:23,  5.81s/it]

Episode 1841/2000, real env return = 5279.54


 93%|██████████████████████████████████▏  | 1851/2000 [4:33:13<14:12,  5.72s/it]

Episode 1851/2000, real env return = 5142.68


 93%|██████████████████████████████████▍  | 1861/2000 [4:34:10<13:15,  5.73s/it]

Episode 1861/2000, real env return = 5150.66


 94%|██████████████████████████████████▌  | 1871/2000 [4:35:08<12:15,  5.70s/it]

Episode 1871/2000, real env return = 5245.74


 94%|██████████████████████████████████▊  | 1881/2000 [4:36:06<11:24,  5.75s/it]

Episode 1881/2000, real env return = 4783.23


 95%|██████████████████████████████████▉  | 1891/2000 [4:37:04<10:31,  5.79s/it]

Episode 1891/2000, real env return = 5082.99


 95%|███████████████████████████████████▏ | 1901/2000 [4:38:03<09:47,  5.93s/it]

Episode 1901/2000, real env return = 5456.64


 96%|███████████████████████████████████▎ | 1911/2000 [4:39:01<08:29,  5.72s/it]

Episode 1911/2000, real env return = 5209.04


 96%|███████████████████████████████████▌ | 1921/2000 [4:39:59<07:44,  5.87s/it]

Episode 1921/2000, real env return = 5313.11


 97%|███████████████████████████████████▋ | 1931/2000 [4:40:58<06:44,  5.86s/it]

Episode 1931/2000, real env return = 5362.43


 97%|███████████████████████████████████▉ | 1941/2000 [4:41:56<05:39,  5.76s/it]

Episode 1941/2000, real env return = 5479.12


 98%|████████████████████████████████████ | 1951/2000 [4:42:54<04:44,  5.80s/it]

Episode 1951/2000, real env return = 5478.02


 98%|████████████████████████████████████▎| 1961/2000 [4:43:53<03:51,  5.93s/it]

Episode 1961/2000, real env return = 5298.89


 99%|████████████████████████████████████▍| 1971/2000 [4:44:52<02:53,  5.98s/it]

Episode 1971/2000, real env return = 5640.47


 99%|████████████████████████████████████▋| 1981/2000 [4:45:51<01:52,  5.91s/it]

Episode 1981/2000, real env return = 5544.44


100%|████████████████████████████████████▊| 1991/2000 [4:46:49<00:51,  5.73s/it]

Episode 1991/2000, real env return = 5520.68


100%|█████████████████████████████████████| 2000/2000 [4:47:41<00:00,  8.63s/it]


Training finished.


In [9]:
train4 = main()

Using cuda device


  0%|                                          | 1/2000 [00:00<11:06,  3.00it/s]

Episode 1/2000, real env return = -355.47


  1%|▏                                      | 11/2000 [00:29<2:54:20,  5.26s/it]

Episode 11/2000, real env return = -279.60


  1%|▍                                      | 21/2000 [01:48<5:27:12,  9.92s/it]

Episode 21/2000, real env return = -221.68


  2%|▌                                      | 31/2000 [03:58<7:03:14, 12.90s/it]

Episode 31/2000, real env return = -364.61


  2%|▊                                      | 41/2000 [06:08<7:04:35, 13.00s/it]

Episode 41/2000, real env return = -385.01


  3%|▉                                      | 51/2000 [08:18<7:03:39, 13.04s/it]

Episode 51/2000, real env return = -488.83


  3%|█▏                                     | 61/2000 [10:29<7:01:24, 13.04s/it]

Episode 61/2000, real env return = -276.11


  4%|█▍                                     | 71/2000 [12:39<6:59:08, 13.04s/it]

Episode 71/2000, real env return = -375.82


  4%|█▌                                     | 81/2000 [14:49<6:56:20, 13.02s/it]

Episode 81/2000, real env return = -410.25


  5%|█▊                                     | 91/2000 [17:00<6:54:27, 13.03s/it]

Episode 91/2000, real env return = -433.20


  5%|█▉                                    | 101/2000 [19:10<6:53:29, 13.06s/it]

Episode 101/2000, real env return = -339.73


  6%|██                                    | 111/2000 [21:22<6:52:42, 13.11s/it]

Episode 111/2000, real env return = -199.56


  6%|██▎                                   | 121/2000 [23:33<6:48:05, 13.03s/it]

Episode 121/2000, real env return = -155.12


  7%|██▍                                   | 131/2000 [25:44<6:49:15, 13.14s/it]

Episode 131/2000, real env return = -341.79


  7%|██▋                                   | 141/2000 [27:55<6:44:24, 13.05s/it]

Episode 141/2000, real env return = -214.57


  8%|██▊                                   | 151/2000 [30:06<6:43:36, 13.10s/it]

Episode 151/2000, real env return = -143.93


  8%|███                                   | 161/2000 [32:17<6:41:31, 13.10s/it]

Episode 161/2000, real env return = -242.88


  9%|███▏                                  | 171/2000 [34:28<6:38:12, 13.06s/it]

Episode 171/2000, real env return = -113.17


  9%|███▍                                  | 181/2000 [36:39<6:36:48, 13.09s/it]

Episode 181/2000, real env return = 92.53


 10%|███▋                                  | 191/2000 [38:50<6:33:54, 13.06s/it]

Episode 191/2000, real env return = -173.08


 10%|███▊                                  | 201/2000 [41:01<6:32:24, 13.09s/it]

Episode 201/2000, real env return = 326.80


 11%|████                                  | 211/2000 [43:12<6:29:20, 13.06s/it]

Episode 211/2000, real env return = -465.01


 11%|████▏                                 | 221/2000 [45:23<6:29:07, 13.12s/it]

Episode 221/2000, real env return = -371.37


 12%|████▍                                 | 231/2000 [47:34<6:24:34, 13.04s/it]

Episode 231/2000, real env return = -205.26


 12%|████▌                                 | 241/2000 [49:46<6:24:50, 13.13s/it]

Episode 241/2000, real env return = 157.53


 13%|████▊                                 | 251/2000 [51:57<6:22:11, 13.11s/it]

Episode 251/2000, real env return = -331.28


 13%|████▉                                 | 261/2000 [54:08<6:19:27, 13.09s/it]

Episode 261/2000, real env return = -169.54


 14%|█████▏                                | 271/2000 [56:19<6:19:35, 13.17s/it]

Episode 271/2000, real env return = 354.91


 14%|█████▎                                | 281/2000 [58:30<6:16:05, 13.13s/it]

Episode 281/2000, real env return = -94.01


 15%|█████▏                              | 291/2000 [1:00:41<6:13:35, 13.12s/it]

Episode 291/2000, real env return = -92.71


 15%|█████▍                              | 301/2000 [1:02:53<6:11:26, 13.12s/it]

Episode 301/2000, real env return = -4.41


 16%|█████▌                              | 311/2000 [1:05:04<6:08:17, 13.08s/it]

Episode 311/2000, real env return = 1081.50


 16%|█████▊                              | 321/2000 [1:07:15<6:06:52, 13.11s/it]

Episode 321/2000, real env return = 1348.12


 17%|█████▉                              | 331/2000 [1:09:26<6:05:03, 13.12s/it]

Episode 331/2000, real env return = 686.41


 17%|██████▏                             | 341/2000 [1:11:37<6:02:18, 13.10s/it]

Episode 341/2000, real env return = 602.77


 18%|██████▎                             | 351/2000 [1:13:49<6:00:32, 13.12s/it]

Episode 351/2000, real env return = 668.52


 18%|██████▍                             | 361/2000 [1:16:00<5:58:28, 13.12s/it]

Episode 361/2000, real env return = 511.49


 19%|██████▋                             | 371/2000 [1:18:12<5:57:26, 13.17s/it]

Episode 371/2000, real env return = 500.59


 19%|██████▊                             | 381/2000 [1:20:23<5:54:54, 13.15s/it]

Episode 381/2000, real env return = 801.21


 20%|███████                             | 391/2000 [1:22:35<5:52:33, 13.15s/it]

Episode 391/2000, real env return = 635.66


 20%|███████▏                            | 401/2000 [1:24:47<5:51:15, 13.18s/it]

Episode 401/2000, real env return = 674.55


 21%|███████▍                            | 411/2000 [1:26:58<5:48:31, 13.16s/it]

Episode 411/2000, real env return = 620.94


 21%|███████▌                            | 421/2000 [1:29:10<5:46:08, 13.15s/it]

Episode 421/2000, real env return = 748.40


 22%|███████▊                            | 431/2000 [1:31:22<5:46:27, 13.25s/it]

Episode 431/2000, real env return = 909.58


 22%|███████▉                            | 441/2000 [1:33:33<5:40:14, 13.09s/it]

Episode 441/2000, real env return = 1036.53


 23%|████████                            | 451/2000 [1:35:45<5:40:37, 13.19s/it]

Episode 451/2000, real env return = 1195.01


 23%|████████▎                           | 461/2000 [1:37:57<5:35:49, 13.09s/it]

Episode 461/2000, real env return = -120.29


 24%|████████▍                           | 471/2000 [1:40:08<5:35:05, 13.15s/it]

Episode 471/2000, real env return = 1432.84


 24%|████████▋                           | 481/2000 [1:42:20<5:32:36, 13.14s/it]

Episode 481/2000, real env return = 1851.45


 25%|████████▊                           | 491/2000 [1:44:31<5:30:24, 13.14s/it]

Episode 491/2000, real env return = 116.72


 25%|█████████                           | 501/2000 [1:46:43<5:29:40, 13.20s/it]

Episode 501/2000, real env return = -178.89


 26%|█████████▏                          | 511/2000 [1:48:55<5:26:24, 13.15s/it]

Episode 511/2000, real env return = 2675.26


 26%|█████████▍                          | 521/2000 [1:51:06<5:22:59, 13.10s/it]

Episode 521/2000, real env return = 2811.00


 27%|█████████▌                          | 531/2000 [1:53:18<5:22:01, 13.15s/it]

Episode 531/2000, real env return = 1007.11


 27%|█████████▋                          | 541/2000 [1:55:29<5:19:48, 13.15s/it]

Episode 541/2000, real env return = 3014.18


 28%|█████████▉                          | 551/2000 [1:57:41<5:18:14, 13.18s/it]

Episode 551/2000, real env return = 3063.36


 28%|██████████                          | 561/2000 [1:59:53<5:16:39, 13.20s/it]

Episode 561/2000, real env return = 3500.90


 29%|██████████▎                         | 571/2000 [2:02:05<5:14:48, 13.22s/it]

Episode 571/2000, real env return = 3214.89


 29%|██████████▍                         | 581/2000 [2:04:17<5:10:31, 13.13s/it]

Episode 581/2000, real env return = 3759.58


 30%|██████████▋                         | 591/2000 [2:06:29<5:09:56, 13.20s/it]

Episode 591/2000, real env return = 2495.28


 30%|██████████▊                         | 601/2000 [2:08:41<5:07:49, 13.20s/it]

Episode 601/2000, real env return = 3635.86


 31%|██████████▉                         | 611/2000 [2:10:53<5:04:13, 13.14s/it]

Episode 611/2000, real env return = 3713.07


 31%|███████████▏                        | 621/2000 [2:13:04<5:02:33, 13.16s/it]

Episode 621/2000, real env return = 3678.36


 32%|███████████▎                        | 631/2000 [2:15:16<5:00:59, 13.19s/it]

Episode 631/2000, real env return = 3838.44


 32%|███████████▌                        | 641/2000 [2:17:28<4:58:10, 13.16s/it]

Episode 641/2000, real env return = 3460.95


 33%|███████████▋                        | 651/2000 [2:19:39<4:56:01, 13.17s/it]

Episode 651/2000, real env return = 3848.27


 33%|███████████▉                        | 661/2000 [2:21:51<4:53:29, 13.15s/it]

Episode 661/2000, real env return = 3624.97


 34%|████████████                        | 671/2000 [2:24:03<4:52:11, 13.19s/it]

Episode 671/2000, real env return = 3878.87


 34%|████████████▎                       | 681/2000 [2:26:15<4:49:38, 13.18s/it]

Episode 681/2000, real env return = 3965.03


 35%|████████████▍                       | 691/2000 [2:28:26<4:46:45, 13.14s/it]

Episode 691/2000, real env return = 3691.07


 35%|████████████▌                       | 701/2000 [2:30:38<4:45:34, 13.19s/it]

Episode 701/2000, real env return = 3955.40


 36%|████████████▊                       | 711/2000 [2:32:51<4:43:43, 13.21s/it]

Episode 711/2000, real env return = 3835.39


 36%|████████████▉                       | 721/2000 [2:35:03<4:41:33, 13.21s/it]

Episode 721/2000, real env return = 3886.20


 37%|█████████████▏                      | 731/2000 [2:37:15<4:39:07, 13.20s/it]

Episode 731/2000, real env return = 3814.50


 37%|█████████████▎                      | 741/2000 [2:39:27<4:36:08, 13.16s/it]

Episode 741/2000, real env return = 3732.94


 38%|█████████████▌                      | 751/2000 [2:41:39<4:35:27, 13.23s/it]

Episode 751/2000, real env return = 949.75


 38%|█████████████▋                      | 761/2000 [2:43:51<4:31:46, 13.16s/it]

Episode 761/2000, real env return = 4098.43


 39%|█████████████▉                      | 771/2000 [2:46:03<4:29:28, 13.16s/it]

Episode 771/2000, real env return = 3893.51


 39%|██████████████                      | 781/2000 [2:48:15<4:27:55, 13.19s/it]

Episode 781/2000, real env return = 3975.53


 40%|██████████████▏                     | 791/2000 [2:50:26<4:25:56, 13.20s/it]

Episode 791/2000, real env return = 4004.39


 40%|██████████████▍                     | 801/2000 [2:52:04<2:26:45,  7.34s/it]

Episode 801/2000, real env return = 3953.89


 41%|██████████████▌                     | 811/2000 [2:53:05<2:00:55,  6.10s/it]

Episode 811/2000, real env return = 4140.47


 41%|██████████████▊                     | 821/2000 [2:54:04<1:56:27,  5.93s/it]

Episode 821/2000, real env return = 4192.78


 42%|██████████████▉                     | 831/2000 [2:55:03<1:55:11,  5.91s/it]

Episode 831/2000, real env return = 4159.53


 42%|███████████████▏                    | 841/2000 [2:56:02<1:54:01,  5.90s/it]

Episode 841/2000, real env return = 4124.58


 43%|███████████████▎                    | 851/2000 [2:57:01<1:54:31,  5.98s/it]

Episode 851/2000, real env return = 3772.31


 43%|███████████████▍                    | 861/2000 [2:58:02<1:55:21,  6.08s/it]

Episode 861/2000, real env return = 4249.21


 44%|███████████████▋                    | 871/2000 [2:59:00<1:50:46,  5.89s/it]

Episode 871/2000, real env return = 4011.39


 44%|███████████████▊                    | 881/2000 [2:59:59<1:50:31,  5.93s/it]

Episode 881/2000, real env return = 4340.05


 45%|████████████████                    | 891/2000 [3:00:59<1:49:13,  5.91s/it]

Episode 891/2000, real env return = 4339.37


 45%|████████████████▏                   | 901/2000 [3:01:57<1:46:03,  5.79s/it]

Episode 901/2000, real env return = 4225.24


 46%|████████████████▍                   | 911/2000 [3:02:55<1:45:45,  5.83s/it]

Episode 911/2000, real env return = 4116.50


 46%|████████████████▌                   | 921/2000 [3:03:54<1:45:24,  5.86s/it]

Episode 921/2000, real env return = 4206.60


 47%|████████████████▊                   | 931/2000 [3:04:53<1:45:02,  5.90s/it]

Episode 931/2000, real env return = 4009.02


 47%|████████████████▉                   | 941/2000 [3:05:51<1:42:25,  5.80s/it]

Episode 941/2000, real env return = 4072.46


 48%|█████████████████                   | 951/2000 [3:06:50<1:42:20,  5.85s/it]

Episode 951/2000, real env return = 3952.52


 48%|█████████████████▎                  | 961/2000 [3:07:49<1:42:43,  5.93s/it]

Episode 961/2000, real env return = 4090.45


 49%|█████████████████▍                  | 971/2000 [3:08:50<1:42:57,  6.00s/it]

Episode 971/2000, real env return = 4065.57


 49%|█████████████████▋                  | 981/2000 [3:09:49<1:39:57,  5.89s/it]

Episode 981/2000, real env return = 4220.37


 50%|█████████████████▊                  | 991/2000 [3:10:47<1:38:22,  5.85s/it]

Episode 991/2000, real env return = 4222.71


 50%|█████████████████▌                 | 1001/2000 [3:11:47<1:38:19,  5.91s/it]

Episode 1001/2000, real env return = 4360.03


 51%|█████████████████▋                 | 1011/2000 [3:12:46<1:36:49,  5.87s/it]

Episode 1011/2000, real env return = 4229.11


 51%|█████████████████▊                 | 1021/2000 [3:13:45<1:35:35,  5.86s/it]

Episode 1021/2000, real env return = 4072.80


 52%|██████████████████                 | 1031/2000 [3:14:44<1:35:59,  5.94s/it]

Episode 1031/2000, real env return = 4324.55


 52%|██████████████████▏                | 1041/2000 [3:15:42<1:33:36,  5.86s/it]

Episode 1041/2000, real env return = 4347.20


 53%|██████████████████▍                | 1051/2000 [3:16:41<1:33:46,  5.93s/it]

Episode 1051/2000, real env return = 4217.17


 53%|██████████████████▌                | 1061/2000 [3:17:40<1:32:42,  5.92s/it]

Episode 1061/2000, real env return = 4514.38


 54%|██████████████████▋                | 1071/2000 [3:18:39<1:30:47,  5.86s/it]

Episode 1071/2000, real env return = 4339.56


 54%|██████████████████▉                | 1081/2000 [3:19:37<1:30:08,  5.89s/it]

Episode 1081/2000, real env return = 4261.31


 55%|███████████████████                | 1091/2000 [3:20:36<1:28:24,  5.84s/it]

Episode 1091/2000, real env return = 4471.06


 55%|███████████████████▎               | 1101/2000 [3:21:35<1:28:44,  5.92s/it]

Episode 1101/2000, real env return = 3750.10


 56%|███████████████████▍               | 1111/2000 [3:22:34<1:26:49,  5.86s/it]

Episode 1111/2000, real env return = 4109.90


 56%|███████████████████▌               | 1121/2000 [3:23:33<1:26:22,  5.90s/it]

Episode 1121/2000, real env return = 3885.85


 57%|███████████████████▊               | 1131/2000 [3:24:32<1:25:38,  5.91s/it]

Episode 1131/2000, real env return = 3884.69


 57%|███████████████████▉               | 1141/2000 [3:25:31<1:24:01,  5.87s/it]

Episode 1141/2000, real env return = 4277.19


 58%|████████████████████▏              | 1151/2000 [3:26:30<1:22:47,  5.85s/it]

Episode 1151/2000, real env return = 3415.19


 58%|████████████████████▎              | 1161/2000 [3:27:30<1:22:36,  5.91s/it]

Episode 1161/2000, real env return = 4094.07


 59%|████████████████████▍              | 1171/2000 [3:28:29<1:20:59,  5.86s/it]

Episode 1171/2000, real env return = 4576.26


 59%|████████████████████▋              | 1181/2000 [3:29:29<1:21:29,  5.97s/it]

Episode 1181/2000, real env return = 4379.98


 60%|████████████████████▊              | 1191/2000 [3:30:28<1:19:30,  5.90s/it]

Episode 1191/2000, real env return = 4381.69


 60%|█████████████████████              | 1201/2000 [3:31:27<1:17:50,  5.84s/it]

Episode 1201/2000, real env return = 4470.33


 61%|█████████████████████▏             | 1211/2000 [3:32:25<1:16:08,  5.79s/it]

Episode 1211/2000, real env return = 4441.24


 61%|█████████████████████▎             | 1221/2000 [3:33:24<1:16:06,  5.86s/it]

Episode 1221/2000, real env return = 4346.82


 62%|█████████████████████▌             | 1231/2000 [3:34:24<1:17:36,  6.06s/it]

Episode 1231/2000, real env return = 4518.59


 62%|█████████████████████▋             | 1241/2000 [3:35:24<1:14:42,  5.91s/it]

Episode 1241/2000, real env return = 4362.34


 63%|█████████████████████▉             | 1251/2000 [3:36:23<1:13:28,  5.89s/it]

Episode 1251/2000, real env return = 4279.01


 63%|██████████████████████             | 1261/2000 [3:37:22<1:12:29,  5.89s/it]

Episode 1261/2000, real env return = 4005.25


 64%|██████████████████████▏            | 1271/2000 [3:38:22<1:11:58,  5.92s/it]

Episode 1271/2000, real env return = 4601.55


 64%|██████████████████████▍            | 1281/2000 [3:39:21<1:10:39,  5.90s/it]

Episode 1281/2000, real env return = 4556.33


 65%|██████████████████████▌            | 1291/2000 [3:40:20<1:10:16,  5.95s/it]

Episode 1291/2000, real env return = 4247.46


 65%|██████████████████████▊            | 1301/2000 [3:41:19<1:08:24,  5.87s/it]

Episode 1301/2000, real env return = 4527.46


 66%|██████████████████████▉            | 1311/2000 [3:42:19<1:09:02,  6.01s/it]

Episode 1311/2000, real env return = 4467.79


 66%|███████████████████████            | 1321/2000 [3:43:19<1:06:54,  5.91s/it]

Episode 1321/2000, real env return = 4558.12


 67%|███████████████████████▎           | 1331/2000 [3:44:17<1:05:17,  5.86s/it]

Episode 1331/2000, real env return = 4537.92


 67%|███████████████████████▍           | 1341/2000 [3:45:16<1:04:45,  5.90s/it]

Episode 1341/2000, real env return = 4728.85


 68%|███████████████████████▋           | 1351/2000 [3:46:15<1:03:59,  5.92s/it]

Episode 1351/2000, real env return = 4527.37


 68%|███████████████████████▊           | 1361/2000 [3:47:14<1:02:35,  5.88s/it]

Episode 1361/2000, real env return = 4573.00


 69%|███████████████████████▉           | 1371/2000 [3:48:18<1:09:59,  6.68s/it]

Episode 1371/2000, real env return = 4639.42


 69%|████████████████████████▏          | 1381/2000 [3:49:17<1:00:57,  5.91s/it]

Episode 1381/2000, real env return = 4509.89


 70%|█████████████████████████▋           | 1391/2000 [3:50:16<59:34,  5.87s/it]

Episode 1391/2000, real env return = 4379.77


 70%|█████████████████████████▉           | 1401/2000 [3:51:16<59:07,  5.92s/it]

Episode 1401/2000, real env return = 4534.48


 71%|████████████████████████▋          | 1411/2000 [3:52:18<1:00:24,  6.15s/it]

Episode 1411/2000, real env return = 4631.33


 71%|██████████████████████████▎          | 1421/2000 [3:53:17<56:49,  5.89s/it]

Episode 1421/2000, real env return = 4561.86


 72%|██████████████████████████▍          | 1431/2000 [3:54:16<55:49,  5.89s/it]

Episode 1431/2000, real env return = 4524.70


 72%|██████████████████████████▋          | 1441/2000 [3:55:16<56:24,  6.05s/it]

Episode 1441/2000, real env return = 4690.97


 73%|██████████████████████████▊          | 1451/2000 [3:56:19<57:39,  6.30s/it]

Episode 1451/2000, real env return = 4540.70


 73%|███████████████████████████          | 1461/2000 [3:57:21<55:38,  6.19s/it]

Episode 1461/2000, real env return = 4752.45


 74%|███████████████████████████▏         | 1471/2000 [3:58:23<54:51,  6.22s/it]

Episode 1471/2000, real env return = 4378.82


 74%|█████████████████████████▉         | 1481/2000 [4:00:23<1:52:54, 13.05s/it]

Episode 1481/2000, real env return = 4676.73


 75%|██████████████████████████         | 1491/2000 [4:03:28<2:35:52, 18.37s/it]

Episode 1491/2000, real env return = 4345.40


 75%|██████████████████████████▎        | 1501/2000 [4:06:34<2:34:07, 18.53s/it]

Episode 1501/2000, real env return = 4620.32


 76%|██████████████████████████▍        | 1511/2000 [4:09:39<2:30:43, 18.49s/it]

Episode 1511/2000, real env return = 4639.46


 76%|██████████████████████████▌        | 1521/2000 [4:12:44<2:27:33, 18.48s/it]

Episode 1521/2000, real env return = 4576.70


 77%|██████████████████████████▊        | 1531/2000 [4:15:51<2:25:39, 18.63s/it]

Episode 1531/2000, real env return = 4389.84


 77%|██████████████████████████▉        | 1541/2000 [4:18:58<2:24:07, 18.84s/it]

Episode 1541/2000, real env return = 4523.36


 78%|███████████████████████████▏       | 1551/2000 [4:22:03<2:18:25, 18.50s/it]

Episode 1551/2000, real env return = 4647.12


 78%|███████████████████████████▎       | 1561/2000 [4:25:09<2:16:54, 18.71s/it]

Episode 1561/2000, real env return = 4741.96


 79%|███████████████████████████▍       | 1571/2000 [4:28:15<2:12:58, 18.60s/it]

Episode 1571/2000, real env return = 4771.49


 79%|███████████████████████████▋       | 1581/2000 [4:31:20<2:09:46, 18.58s/it]

Episode 1581/2000, real env return = 4710.84


 80%|███████████████████████████▊       | 1591/2000 [4:34:26<2:06:15, 18.52s/it]

Episode 1591/2000, real env return = 4395.97


 80%|████████████████████████████       | 1601/2000 [4:37:32<2:03:07, 18.51s/it]

Episode 1601/2000, real env return = 4364.40


 81%|████████████████████████████▏      | 1611/2000 [4:40:38<2:00:15, 18.55s/it]

Episode 1611/2000, real env return = 4487.67


 81%|████████████████████████████▎      | 1621/2000 [4:43:43<1:56:51, 18.50s/it]

Episode 1621/2000, real env return = 4634.07


 82%|████████████████████████████▌      | 1631/2000 [4:46:49<1:53:53, 18.52s/it]

Episode 1631/2000, real env return = 4521.62


 82%|████████████████████████████▋      | 1641/2000 [4:49:54<1:51:04, 18.56s/it]

Episode 1641/2000, real env return = 4589.39


 83%|████████████████████████████▉      | 1651/2000 [4:52:59<1:47:22, 18.46s/it]

Episode 1651/2000, real env return = 4529.46


 83%|█████████████████████████████      | 1661/2000 [4:55:34<1:16:58, 13.62s/it]

Episode 1661/2000, real env return = 4511.79


 84%|█████████████████████████████▏     | 1671/2000 [4:57:41<1:09:45, 12.72s/it]

Episode 1671/2000, real env return = 4534.36


 84%|█████████████████████████████▍     | 1681/2000 [4:59:49<1:07:54, 12.77s/it]

Episode 1681/2000, real env return = 4725.30


 85%|█████████████████████████████▌     | 1691/2000 [5:01:56<1:05:30, 12.72s/it]

Episode 1691/2000, real env return = 4555.73


 85%|█████████████████████████████▊     | 1701/2000 [5:04:03<1:03:22, 12.72s/it]

Episode 1701/2000, real env return = 2561.78


 86%|█████████████████████████████▉     | 1711/2000 [5:06:11<1:01:23, 12.75s/it]

Episode 1711/2000, real env return = 4473.80


 86%|███████████████████████████████▊     | 1721/2000 [5:08:18<59:19, 12.76s/it]

Episode 1721/2000, real env return = 4723.14


 87%|████████████████████████████████     | 1731/2000 [5:10:25<56:45, 12.66s/it]

Episode 1731/2000, real env return = 4866.28


 87%|████████████████████████████████▏    | 1741/2000 [5:12:32<54:34, 12.64s/it]

Episode 1741/2000, real env return = 4745.57


 88%|████████████████████████████████▍    | 1751/2000 [5:14:41<52:48, 12.73s/it]

Episode 1751/2000, real env return = 4735.39


 88%|██████████████████████████████▊    | 1761/2000 [5:17:24<1:11:00, 17.83s/it]

Episode 1761/2000, real env return = 4946.01


 89%|██████████████████████████████▉    | 1771/2000 [5:20:29<1:10:22, 18.44s/it]

Episode 1771/2000, real env return = 4722.96


 89%|███████████████████████████████▏   | 1781/2000 [5:23:35<1:07:40, 18.54s/it]

Episode 1781/2000, real env return = 4977.91


 90%|███████████████████████████████▎   | 1791/2000 [5:26:41<1:04:35, 18.54s/it]

Episode 1791/2000, real env return = 4785.30


 90%|█████████████████████████████████▎   | 1801/2000 [5:29:11<45:00, 13.57s/it]

Episode 1801/2000, real env return = 4572.44


 91%|█████████████████████████████████▌   | 1811/2000 [5:31:20<40:51, 12.97s/it]

Episode 1811/2000, real env return = 4693.12


 91%|█████████████████████████████████▋   | 1821/2000 [5:33:30<38:38, 12.95s/it]

Episode 1821/2000, real env return = 4741.11


 92%|█████████████████████████████████▊   | 1831/2000 [5:35:40<36:29, 12.96s/it]

Episode 1831/2000, real env return = 4863.72


 92%|██████████████████████████████████   | 1841/2000 [5:37:50<34:28, 13.01s/it]

Episode 1841/2000, real env return = 4817.03


 93%|██████████████████████████████████▏  | 1851/2000 [5:40:00<32:16, 12.99s/it]

Episode 1851/2000, real env return = 4813.48


 93%|██████████████████████████████████▍  | 1861/2000 [5:42:09<29:54, 12.91s/it]

Episode 1861/2000, real env return = 4426.45


 94%|██████████████████████████████████▌  | 1871/2000 [5:44:18<27:49, 12.94s/it]

Episode 1871/2000, real env return = 4609.42


 94%|██████████████████████████████████▊  | 1881/2000 [5:46:28<25:40, 12.95s/it]

Episode 1881/2000, real env return = 4806.87


 95%|██████████████████████████████████▉  | 1891/2000 [5:48:38<23:32, 12.96s/it]

Episode 1891/2000, real env return = 4767.85


 95%|███████████████████████████████████▏ | 1901/2000 [5:50:48<21:25, 12.98s/it]

Episode 1901/2000, real env return = 4661.06


 96%|███████████████████████████████████▎ | 1911/2000 [5:52:57<19:10, 12.93s/it]

Episode 1911/2000, real env return = 4715.16


 96%|███████████████████████████████████▌ | 1921/2000 [5:55:07<17:03, 12.96s/it]

Episode 1921/2000, real env return = 4739.45


 97%|███████████████████████████████████▋ | 1931/2000 [5:57:16<14:52, 12.94s/it]

Episode 1931/2000, real env return = 4843.38


 97%|███████████████████████████████████▉ | 1941/2000 [5:59:26<12:45, 12.98s/it]

Episode 1941/2000, real env return = 4799.90


 98%|████████████████████████████████████ | 1951/2000 [6:01:36<10:34, 12.95s/it]

Episode 1951/2000, real env return = 4853.89


 98%|████████████████████████████████████▎| 1961/2000 [6:03:45<08:24, 12.94s/it]

Episode 1961/2000, real env return = 4958.34


 99%|████████████████████████████████████▍| 1971/2000 [6:05:55<06:14, 12.92s/it]

Episode 1971/2000, real env return = 4839.59


 99%|████████████████████████████████████▋| 1981/2000 [6:08:04<04:06, 12.95s/it]

Episode 1981/2000, real env return = 4661.94


100%|████████████████████████████████████▊| 1991/2000 [6:10:14<01:56, 12.97s/it]

Episode 1991/2000, real env return = 4670.08


100%|█████████████████████████████████████| 2000/2000 [6:12:11<00:00, 11.17s/it]


Training finished.


In [11]:
train5 = main()

Using cuda device


  0%|                                          | 1/2000 [00:01<38:57,  1.17s/it]

Episode 1/2000, real env return = -293.40


  1%|▏                                      | 11/2000 [01:23<8:14:43, 14.92s/it]

Episode 11/2000, real env return = -395.94


  1%|▍                                     | 21/2000 [04:32<10:17:33, 18.72s/it]

Episode 21/2000, real env return = -355.21


  2%|▌                                     | 31/2000 [07:40<10:17:24, 18.81s/it]

Episode 31/2000, real env return = -357.62


  2%|▊                                     | 41/2000 [10:48<10:13:53, 18.80s/it]

Episode 41/2000, real env return = -454.86


  3%|▉                                     | 51/2000 [13:57<10:12:10, 18.85s/it]

Episode 51/2000, real env return = -280.75


  3%|█▏                                    | 61/2000 [17:05<10:05:06, 18.72s/it]

Episode 61/2000, real env return = -246.10


  4%|█▎                                    | 71/2000 [20:12<10:02:08, 18.73s/it]

Episode 71/2000, real env return = -413.96


  4%|█▌                                     | 81/2000 [23:20<9:57:25, 18.68s/it]

Episode 81/2000, real env return = -339.41


  5%|█▊                                     | 91/2000 [26:27<9:54:42, 18.69s/it]

Episode 91/2000, real env return = -214.76


  5%|█▉                                    | 101/2000 [29:35<9:51:58, 18.70s/it]

Episode 101/2000, real env return = -55.90


  6%|██                                    | 111/2000 [32:41<9:48:19, 18.69s/it]

Episode 111/2000, real env return = -281.87


  6%|██▎                                   | 121/2000 [35:48<9:42:50, 18.61s/it]

Episode 121/2000, real env return = -336.52


  7%|██▍                                   | 131/2000 [38:53<9:35:35, 18.48s/it]

Episode 131/2000, real env return = -170.62


  7%|██▋                                   | 141/2000 [41:59<9:37:36, 18.64s/it]

Episode 141/2000, real env return = -172.27


  8%|██▊                                   | 151/2000 [45:05<9:30:45, 18.52s/it]

Episode 151/2000, real env return = -258.10


  8%|███                                   | 161/2000 [48:10<9:24:55, 18.43s/it]

Episode 161/2000, real env return = -172.90


  9%|███▏                                  | 171/2000 [51:14<9:20:29, 18.39s/it]

Episode 171/2000, real env return = -237.13


  9%|███▍                                  | 181/2000 [54:19<9:18:01, 18.41s/it]

Episode 181/2000, real env return = -84.28


 10%|███▋                                  | 191/2000 [57:24<9:17:57, 18.51s/it]

Episode 191/2000, real env return = 61.37


 10%|███▌                                | 201/2000 [1:00:27<9:09:37, 18.33s/it]

Episode 201/2000, real env return = -376.76


 11%|███▊                                | 211/2000 [1:03:33<9:10:12, 18.45s/it]

Episode 211/2000, real env return = 946.54


 11%|███▉                                | 221/2000 [1:06:39<9:13:35, 18.67s/it]

Episode 221/2000, real env return = 639.81


 12%|████▏                               | 231/2000 [1:09:44<9:07:17, 18.56s/it]

Episode 231/2000, real env return = 508.65


 12%|████▎                               | 241/2000 [1:12:50<9:03:11, 18.53s/it]

Episode 241/2000, real env return = 635.99


 13%|████▌                               | 251/2000 [1:15:54<8:55:52, 18.38s/it]

Episode 251/2000, real env return = 614.07


 13%|████▋                               | 261/2000 [1:18:58<8:52:46, 18.38s/it]

Episode 261/2000, real env return = 629.08


 14%|████▉                               | 271/2000 [1:21:12<6:17:05, 13.09s/it]

Episode 271/2000, real env return = 592.32


 14%|█████                               | 281/2000 [1:23:21<6:08:23, 12.86s/it]

Episode 281/2000, real env return = 553.58


 15%|█████▏                              | 291/2000 [1:25:29<6:06:06, 12.85s/it]

Episode 291/2000, real env return = 628.51


 15%|█████▍                              | 301/2000 [1:27:38<6:04:31, 12.87s/it]

Episode 301/2000, real env return = 718.24


 16%|█████▌                              | 311/2000 [1:29:47<6:01:55, 12.86s/it]

Episode 311/2000, real env return = 1255.53


 16%|█████▊                              | 321/2000 [1:31:55<6:00:11, 12.87s/it]

Episode 321/2000, real env return = 1585.29


 17%|█████▉                              | 331/2000 [1:34:04<5:58:02, 12.87s/it]

Episode 331/2000, real env return = 1879.74


 17%|██████▏                             | 341/2000 [1:36:13<5:55:54, 12.87s/it]

Episode 341/2000, real env return = 2191.60


 18%|██████▎                             | 351/2000 [1:38:22<5:54:20, 12.89s/it]

Episode 351/2000, real env return = 2600.59


 18%|██████▍                             | 361/2000 [1:40:31<5:52:35, 12.91s/it]

Episode 361/2000, real env return = 2530.78


 19%|██████▋                             | 371/2000 [1:42:40<5:52:27, 12.98s/it]

Episode 371/2000, real env return = 2376.57


 19%|██████▊                             | 381/2000 [1:44:49<5:47:40, 12.88s/it]

Episode 381/2000, real env return = 2504.44


 20%|███████                             | 391/2000 [1:46:58<5:45:48, 12.90s/it]

Episode 391/2000, real env return = 2343.64


 20%|███████▏                            | 401/2000 [1:49:07<5:43:47, 12.90s/it]

Episode 401/2000, real env return = 2491.39


 21%|███████▍                            | 411/2000 [1:51:17<5:40:36, 12.86s/it]

Episode 411/2000, real env return = 2335.76


 21%|███████▌                            | 421/2000 [1:53:26<5:38:48, 12.87s/it]

Episode 421/2000, real env return = 2568.08


 22%|███████▊                            | 431/2000 [1:55:34<5:37:05, 12.89s/it]

Episode 431/2000, real env return = 2169.71


 22%|███████▉                            | 441/2000 [1:57:43<5:35:15, 12.90s/it]

Episode 441/2000, real env return = 2100.42


 23%|████████                            | 451/2000 [1:59:53<5:32:26, 12.88s/it]

Episode 451/2000, real env return = 2041.59


 23%|████████▎                           | 461/2000 [2:02:01<5:31:06, 12.91s/it]

Episode 461/2000, real env return = 2643.94


 24%|████████▍                           | 471/2000 [2:04:10<5:28:12, 12.88s/it]

Episode 471/2000, real env return = 2427.46


 24%|████████▋                           | 481/2000 [2:06:19<5:24:59, 12.84s/it]

Episode 481/2000, real env return = 2708.22


 25%|████████▊                           | 491/2000 [2:08:28<5:25:48, 12.95s/it]

Episode 491/2000, real env return = 2679.53


 25%|█████████                           | 501/2000 [2:10:37<5:21:23, 12.86s/it]

Episode 501/2000, real env return = 2688.64


 26%|█████████▏                          | 511/2000 [2:12:45<5:17:44, 12.80s/it]

Episode 511/2000, real env return = 2386.40


 26%|█████████▍                          | 521/2000 [2:14:55<5:21:53, 13.06s/it]

Episode 521/2000, real env return = 3098.11


 27%|█████████▌                          | 531/2000 [2:17:04<5:15:01, 12.87s/it]

Episode 531/2000, real env return = 2934.04


 27%|█████████▋                          | 541/2000 [2:19:13<5:11:59, 12.83s/it]

Episode 541/2000, real env return = 3160.27


 28%|█████████▉                          | 551/2000 [2:21:21<5:10:20, 12.85s/it]

Episode 551/2000, real env return = 3072.28


 28%|██████████                          | 561/2000 [2:23:30<5:07:03, 12.80s/it]

Episode 561/2000, real env return = 2648.97


 29%|██████████▎                         | 571/2000 [2:25:38<5:05:52, 12.84s/it]

Episode 571/2000, real env return = 2881.05


 29%|██████████▍                         | 581/2000 [2:27:46<5:03:29, 12.83s/it]

Episode 581/2000, real env return = 3103.93


 30%|██████████▋                         | 591/2000 [2:29:55<5:02:24, 12.88s/it]

Episode 591/2000, real env return = 3068.38


 30%|██████████▊                         | 601/2000 [2:32:04<4:59:54, 12.86s/it]

Episode 601/2000, real env return = 3221.28


 31%|██████████▉                         | 611/2000 [2:34:12<4:57:37, 12.86s/it]

Episode 611/2000, real env return = 3130.50


 31%|███████████▏                        | 621/2000 [2:36:20<4:54:23, 12.81s/it]

Episode 621/2000, real env return = 3144.08


 32%|███████████▎                        | 631/2000 [2:38:29<4:53:17, 12.85s/it]

Episode 631/2000, real env return = 3300.17


 32%|███████████▌                        | 641/2000 [2:40:38<4:51:40, 12.88s/it]

Episode 641/2000, real env return = 3381.14


 33%|███████████▋                        | 651/2000 [2:42:47<4:49:09, 12.86s/it]

Episode 651/2000, real env return = 3414.14


 33%|███████████▉                        | 661/2000 [2:44:55<4:47:18, 12.87s/it]

Episode 661/2000, real env return = 3372.37


 34%|████████████                        | 671/2000 [2:47:04<4:46:45, 12.95s/it]

Episode 671/2000, real env return = 3620.08


 34%|████████████▎                       | 681/2000 [2:49:13<4:43:57, 12.92s/it]

Episode 681/2000, real env return = 3287.27


 35%|████████████▍                       | 691/2000 [2:51:22<4:41:20, 12.90s/it]

Episode 691/2000, real env return = 3727.49


 35%|████████████▌                       | 701/2000 [2:53:31<4:39:34, 12.91s/it]

Episode 701/2000, real env return = 3505.36


 36%|████████████▊                       | 711/2000 [2:55:40<4:35:37, 12.83s/it]

Episode 711/2000, real env return = 3620.43


 36%|████████████▉                       | 721/2000 [2:57:49<4:34:35, 12.88s/it]

Episode 721/2000, real env return = 3393.81


 37%|█████████████▏                      | 731/2000 [2:59:58<4:32:51, 12.90s/it]

Episode 731/2000, real env return = 3537.54


 37%|█████████████▎                      | 741/2000 [3:02:07<4:30:28, 12.89s/it]

Episode 741/2000, real env return = 3652.38


 38%|█████████████▌                      | 751/2000 [3:04:16<4:27:58, 12.87s/it]

Episode 751/2000, real env return = 1381.01


 38%|█████████████▋                      | 761/2000 [3:06:25<4:25:53, 12.88s/it]

Episode 761/2000, real env return = 3692.81


 39%|█████████████▉                      | 771/2000 [3:08:33<4:22:55, 12.84s/it]

Episode 771/2000, real env return = 3591.11


 39%|██████████████                      | 781/2000 [3:10:42<4:21:03, 12.85s/it]

Episode 781/2000, real env return = 3447.00


 40%|██████████████▏                     | 791/2000 [3:12:51<4:19:44, 12.89s/it]

Episode 791/2000, real env return = 3811.90


 40%|██████████████▍                     | 801/2000 [3:15:00<4:17:01, 12.86s/it]

Episode 801/2000, real env return = 3841.40


 41%|██████████████▌                     | 811/2000 [3:17:09<4:15:03, 12.87s/it]

Episode 811/2000, real env return = 3734.57


 41%|██████████████▊                     | 821/2000 [3:19:18<4:13:20, 12.89s/it]

Episode 821/2000, real env return = 3617.85


 42%|██████████████▉                     | 831/2000 [3:21:26<4:10:56, 12.88s/it]

Episode 831/2000, real env return = 3894.52


 42%|███████████████▏                    | 841/2000 [3:23:35<4:08:12, 12.85s/it]

Episode 841/2000, real env return = 3574.95


 43%|███████████████▎                    | 851/2000 [3:25:44<4:06:29, 12.87s/it]

Episode 851/2000, real env return = 3898.95


 43%|███████████████▍                    | 861/2000 [3:27:53<4:04:38, 12.89s/it]

Episode 861/2000, real env return = 3616.06


 44%|███████████████▋                    | 871/2000 [3:30:02<4:02:36, 12.89s/it]

Episode 871/2000, real env return = 3767.71


 44%|███████████████▊                    | 881/2000 [3:32:10<4:00:03, 12.87s/it]

Episode 881/2000, real env return = 3630.61


 45%|████████████████                    | 891/2000 [3:34:19<3:57:59, 12.88s/it]

Episode 891/2000, real env return = 4080.46


 45%|████████████████▏                   | 901/2000 [3:36:28<3:54:59, 12.83s/it]

Episode 901/2000, real env return = 4191.34


 46%|████████████████▍                   | 911/2000 [3:38:37<3:53:57, 12.89s/it]

Episode 911/2000, real env return = 3971.96


 46%|████████████████▌                   | 921/2000 [3:40:46<3:51:35, 12.88s/it]

Episode 921/2000, real env return = 3936.76


 47%|████████████████▊                   | 931/2000 [3:42:55<3:49:44, 12.89s/it]

Episode 931/2000, real env return = -67.94


 47%|████████████████▉                   | 941/2000 [3:45:03<3:46:10, 12.81s/it]

Episode 941/2000, real env return = -578.51


 48%|█████████████████                   | 951/2000 [3:47:12<3:45:14, 12.88s/it]

Episode 951/2000, real env return = 597.76


 48%|█████████████████▎                  | 961/2000 [3:49:21<3:42:58, 12.88s/it]

Episode 961/2000, real env return = 3675.49


 49%|█████████████████▍                  | 971/2000 [3:51:30<3:40:44, 12.87s/it]

Episode 971/2000, real env return = 3898.85


 49%|█████████████████▋                  | 981/2000 [3:53:38<3:37:59, 12.84s/it]

Episode 981/2000, real env return = 3796.75


 50%|█████████████████▊                  | 991/2000 [3:55:48<3:37:02, 12.91s/it]

Episode 991/2000, real env return = 3870.30


 50%|█████████████████▌                 | 1001/2000 [3:57:57<3:34:33, 12.89s/it]

Episode 1001/2000, real env return = 4037.66


 51%|█████████████████▋                 | 1011/2000 [4:00:06<3:32:10, 12.87s/it]

Episode 1011/2000, real env return = 4285.91


 51%|█████████████████▊                 | 1021/2000 [4:02:14<3:29:34, 12.84s/it]

Episode 1021/2000, real env return = 3951.08


 52%|██████████████████                 | 1031/2000 [4:04:23<3:27:39, 12.86s/it]

Episode 1031/2000, real env return = 3918.84


 52%|██████████████████▏                | 1041/2000 [4:06:31<3:25:21, 12.85s/it]

Episode 1041/2000, real env return = 3774.33


 53%|██████████████████▍                | 1051/2000 [4:08:40<3:23:30, 12.87s/it]

Episode 1051/2000, real env return = 3667.16


 53%|██████████████████▌                | 1061/2000 [4:10:49<3:21:10, 12.85s/it]

Episode 1061/2000, real env return = 3802.65


 54%|██████████████████▋                | 1071/2000 [4:12:57<3:18:51, 12.84s/it]

Episode 1071/2000, real env return = 3845.01


 54%|██████████████████▉                | 1081/2000 [4:15:08<3:21:24, 13.15s/it]

Episode 1081/2000, real env return = 3515.56


 55%|███████████████████                | 1091/2000 [4:17:43<4:28:36, 17.73s/it]

Episode 1091/2000, real env return = 3918.31


 55%|███████████████████▎               | 1101/2000 [4:20:52<4:42:27, 18.85s/it]

Episode 1101/2000, real env return = 3566.92


 56%|███████████████████▍               | 1111/2000 [4:23:49<3:57:41, 16.04s/it]

Episode 1111/2000, real env return = 4040.98


 56%|███████████████████▌               | 1121/2000 [4:26:24<4:15:48, 17.46s/it]

Episode 1121/2000, real env return = 4062.54


 57%|███████████████████▊               | 1131/2000 [4:28:53<3:57:58, 16.43s/it]

Episode 1131/2000, real env return = 4212.91


 57%|███████████████████▉               | 1141/2000 [4:31:48<4:12:36, 17.64s/it]

Episode 1141/2000, real env return = 3836.65


 58%|████████████████████▏              | 1151/2000 [4:34:47<4:18:14, 18.25s/it]

Episode 1151/2000, real env return = 4088.25


 58%|████████████████████▎              | 1161/2000 [4:37:51<4:17:23, 18.41s/it]

Episode 1161/2000, real env return = 4109.92


 59%|████████████████████▍              | 1171/2000 [4:40:53<4:11:39, 18.21s/it]

Episode 1171/2000, real env return = 4171.64


 59%|████████████████████▋              | 1181/2000 [4:43:56<4:10:52, 18.38s/it]

Episode 1181/2000, real env return = 4159.51


 60%|████████████████████▊              | 1191/2000 [4:47:00<4:08:11, 18.41s/it]

Episode 1191/2000, real env return = 4092.84


 60%|█████████████████████              | 1201/2000 [4:50:03<4:03:33, 18.29s/it]

Episode 1201/2000, real env return = 4041.89


 61%|█████████████████████▏             | 1211/2000 [4:53:08<4:03:36, 18.53s/it]

Episode 1211/2000, real env return = 4238.85


 61%|█████████████████████▎             | 1221/2000 [4:56:12<3:58:04, 18.34s/it]

Episode 1221/2000, real env return = 4219.15


 62%|█████████████████████▌             | 1231/2000 [4:59:16<3:55:19, 18.36s/it]

Episode 1231/2000, real env return = 4205.43


 62%|█████████████████████▋             | 1241/2000 [5:02:18<3:50:16, 18.20s/it]

Episode 1241/2000, real env return = 3940.15


 63%|█████████████████████▉             | 1251/2000 [5:05:22<3:48:13, 18.28s/it]

Episode 1251/2000, real env return = 4214.55


 63%|██████████████████████             | 1261/2000 [5:08:25<3:45:30, 18.31s/it]

Episode 1261/2000, real env return = 3849.85


 64%|██████████████████████▏            | 1271/2000 [5:11:29<3:43:32, 18.40s/it]

Episode 1271/2000, real env return = 3971.65


 64%|██████████████████████▍            | 1281/2000 [5:14:33<3:40:46, 18.42s/it]

Episode 1281/2000, real env return = 4076.18


 65%|██████████████████████▌            | 1291/2000 [5:17:36<3:37:00, 18.36s/it]

Episode 1291/2000, real env return = 4176.05


 65%|██████████████████████▊            | 1301/2000 [5:20:39<3:32:13, 18.22s/it]

Episode 1301/2000, real env return = 4144.21


 66%|██████████████████████▉            | 1311/2000 [5:23:42<3:30:59, 18.37s/it]

Episode 1311/2000, real env return = 4155.76


 66%|███████████████████████            | 1321/2000 [5:26:46<3:29:49, 18.54s/it]

Episode 1321/2000, real env return = 3887.61


 67%|███████████████████████▎           | 1331/2000 [5:29:52<3:26:51, 18.55s/it]

Episode 1331/2000, real env return = 4406.05


 67%|███████████████████████▍           | 1341/2000 [5:32:59<3:24:53, 18.66s/it]

Episode 1341/2000, real env return = 4124.27


 68%|███████████████████████▋           | 1351/2000 [5:36:06<3:22:36, 18.73s/it]

Episode 1351/2000, real env return = 4436.20


 68%|███████████████████████▊           | 1361/2000 [5:39:13<3:18:40, 18.66s/it]

Episode 1361/2000, real env return = 3945.61


 69%|███████████████████████▉           | 1371/2000 [5:42:20<3:15:38, 18.66s/it]

Episode 1371/2000, real env return = 4265.32


 69%|████████████████████████▏          | 1381/2000 [5:45:27<3:13:36, 18.77s/it]

Episode 1381/2000, real env return = 4412.01


 70%|████████████████████████▎          | 1391/2000 [5:48:34<3:09:41, 18.69s/it]

Episode 1391/2000, real env return = 4078.85


 70%|████████████████████████▌          | 1401/2000 [5:51:42<3:08:40, 18.90s/it]

Episode 1401/2000, real env return = 4049.04


 71%|████████████████████████▋          | 1411/2000 [5:54:49<3:04:31, 18.80s/it]

Episode 1411/2000, real env return = 4184.60


 71%|████████████████████████▊          | 1421/2000 [5:57:55<2:59:58, 18.65s/it]

Episode 1421/2000, real env return = 3854.29


 72%|█████████████████████████          | 1431/2000 [6:01:02<2:57:16, 18.69s/it]

Episode 1431/2000, real env return = 4114.32


 72%|█████████████████████████▏         | 1441/2000 [6:04:08<2:53:51, 18.66s/it]

Episode 1441/2000, real env return = 4297.89


 73%|█████████████████████████▍         | 1451/2000 [6:07:16<2:51:08, 18.70s/it]

Episode 1451/2000, real env return = 4171.75


 73%|█████████████████████████▌         | 1461/2000 [6:10:23<2:47:28, 18.64s/it]

Episode 1461/2000, real env return = 4290.99


 74%|█████████████████████████▋         | 1471/2000 [6:13:30<2:45:17, 18.75s/it]

Episode 1471/2000, real env return = 4358.75


 74%|█████████████████████████▉         | 1481/2000 [6:16:36<2:41:51, 18.71s/it]

Episode 1481/2000, real env return = 3897.97


 75%|██████████████████████████         | 1491/2000 [6:19:43<2:37:57, 18.62s/it]

Episode 1491/2000, real env return = 4248.53


 75%|██████████████████████████▎        | 1501/2000 [6:22:49<2:34:25, 18.57s/it]

Episode 1501/2000, real env return = 4387.66


 76%|██████████████████████████▍        | 1511/2000 [6:25:54<2:30:49, 18.51s/it]

Episode 1511/2000, real env return = 4185.53


 76%|██████████████████████████▌        | 1521/2000 [6:29:01<2:28:31, 18.60s/it]

Episode 1521/2000, real env return = 4285.26


 77%|██████████████████████████▊        | 1531/2000 [6:32:04<2:23:03, 18.30s/it]

Episode 1531/2000, real env return = 4365.73


 77%|██████████████████████████▉        | 1541/2000 [6:35:10<2:21:31, 18.50s/it]

Episode 1541/2000, real env return = 4322.48


 78%|███████████████████████████▏       | 1551/2000 [6:38:12<2:15:40, 18.13s/it]

Episode 1551/2000, real env return = 4390.84


 78%|███████████████████████████▎       | 1561/2000 [6:41:13<2:12:54, 18.16s/it]

Episode 1561/2000, real env return = 3931.80


 79%|███████████████████████████▍       | 1571/2000 [6:44:14<2:09:24, 18.10s/it]

Episode 1571/2000, real env return = 4505.98


 79%|███████████████████████████▋       | 1581/2000 [6:47:16<2:06:12, 18.07s/it]

Episode 1581/2000, real env return = 4360.68


 80%|███████████████████████████▊       | 1591/2000 [6:50:18<2:04:12, 18.22s/it]

Episode 1591/2000, real env return = 4180.14


 80%|████████████████████████████       | 1601/2000 [6:53:20<2:00:53, 18.18s/it]

Episode 1601/2000, real env return = 4544.89


 81%|████████████████████████████▏      | 1611/2000 [6:56:22<1:57:52, 18.18s/it]

Episode 1611/2000, real env return = 4504.31


 81%|████████████████████████████▎      | 1621/2000 [6:59:24<1:55:15, 18.25s/it]

Episode 1621/2000, real env return = 4382.36


 82%|████████████████████████████▌      | 1631/2000 [7:02:27<1:52:28, 18.29s/it]

Episode 1631/2000, real env return = 4755.60


 82%|████████████████████████████▋      | 1641/2000 [7:04:53<1:16:19, 12.76s/it]

Episode 1641/2000, real env return = 4350.57


 83%|████████████████████████████▉      | 1651/2000 [7:06:51<1:08:25, 11.76s/it]

Episode 1651/2000, real env return = 4341.24


 83%|█████████████████████████████      | 1661/2000 [7:08:50<1:07:01, 11.86s/it]

Episode 1661/2000, real env return = 4347.46


 84%|█████████████████████████████▏     | 1671/2000 [7:10:47<1:03:48, 11.64s/it]

Episode 1671/2000, real env return = 4634.35


 84%|█████████████████████████████▍     | 1681/2000 [7:12:45<1:02:51, 11.82s/it]

Episode 1681/2000, real env return = 4361.94


 85%|█████████████████████████████▌     | 1691/2000 [7:14:43<1:00:33, 11.76s/it]

Episode 1691/2000, real env return = 4340.75


 85%|███████████████████████████████▍     | 1701/2000 [7:16:41<58:58, 11.84s/it]

Episode 1701/2000, real env return = 4292.18


 86%|███████████████████████████████▋     | 1711/2000 [7:18:39<56:50, 11.80s/it]

Episode 1711/2000, real env return = 4386.02


 86%|███████████████████████████████▊     | 1721/2000 [7:20:36<54:22, 11.69s/it]

Episode 1721/2000, real env return = 4749.42


 87%|████████████████████████████████     | 1731/2000 [7:22:34<53:01, 11.83s/it]

Episode 1731/2000, real env return = 4344.80


 87%|████████████████████████████████▏    | 1741/2000 [7:24:33<50:54, 11.79s/it]

Episode 1741/2000, real env return = 4354.56


 88%|████████████████████████████████▍    | 1751/2000 [7:26:31<48:28, 11.68s/it]

Episode 1751/2000, real env return = 4404.40


 88%|████████████████████████████████▌    | 1761/2000 [7:28:28<46:22, 11.64s/it]

Episode 1761/2000, real env return = 4523.26


 89%|████████████████████████████████▊    | 1771/2000 [7:30:25<44:30, 11.66s/it]

Episode 1771/2000, real env return = -508.07


 89%|████████████████████████████████▉    | 1781/2000 [7:32:22<42:45, 11.72s/it]

Episode 1781/2000, real env return = 4397.57


 90%|█████████████████████████████████▏   | 1791/2000 [7:34:20<40:25, 11.61s/it]

Episode 1791/2000, real env return = 4175.47


 90%|█████████████████████████████████▎   | 1801/2000 [7:36:18<39:00, 11.76s/it]

Episode 1801/2000, real env return = 4668.40


 91%|█████████████████████████████████▌   | 1811/2000 [7:38:16<36:56, 11.73s/it]

Episode 1811/2000, real env return = 4217.08


 91%|█████████████████████████████████▋   | 1821/2000 [7:40:16<35:35, 11.93s/it]

Episode 1821/2000, real env return = 4426.37


 92%|█████████████████████████████████▊   | 1831/2000 [7:42:15<33:37, 11.94s/it]

Episode 1831/2000, real env return = 4429.22


 92%|██████████████████████████████████   | 1841/2000 [7:44:13<31:26, 11.87s/it]

Episode 1841/2000, real env return = 4284.21


 93%|██████████████████████████████████▏  | 1851/2000 [7:46:11<29:14, 11.78s/it]

Episode 1851/2000, real env return = 4245.58


 93%|██████████████████████████████████▍  | 1861/2000 [7:48:11<27:50, 12.02s/it]

Episode 1861/2000, real env return = 4475.76


 94%|██████████████████████████████████▌  | 1871/2000 [7:50:11<25:33, 11.89s/it]

Episode 1871/2000, real env return = 4277.94


 94%|██████████████████████████████████▊  | 1881/2000 [7:52:09<23:30, 11.85s/it]

Episode 1881/2000, real env return = 4316.24


 95%|██████████████████████████████████▉  | 1891/2000 [7:54:07<21:39, 11.92s/it]

Episode 1891/2000, real env return = 4404.27


 95%|███████████████████████████████████▏ | 1901/2000 [7:56:08<19:43, 11.96s/it]

Episode 1901/2000, real env return = 4083.94


 96%|███████████████████████████████████▎ | 1911/2000 [7:58:08<17:42, 11.94s/it]

Episode 1911/2000, real env return = 4441.71


 96%|███████████████████████████████████▌ | 1921/2000 [8:00:09<15:42, 11.93s/it]

Episode 1921/2000, real env return = 4508.47


 97%|███████████████████████████████████▋ | 1931/2000 [8:02:09<14:01, 12.19s/it]

Episode 1931/2000, real env return = 4553.33


 97%|███████████████████████████████████▉ | 1941/2000 [8:04:10<11:43, 11.93s/it]

Episode 1941/2000, real env return = 4042.11


 98%|████████████████████████████████████ | 1951/2000 [8:06:09<09:47, 11.99s/it]

Episode 1951/2000, real env return = 4649.30


 98%|████████████████████████████████████▎| 1961/2000 [8:08:08<07:41, 11.82s/it]

Episode 1961/2000, real env return = 4410.56


 99%|████████████████████████████████████▍| 1971/2000 [8:10:08<05:47, 11.98s/it]

Episode 1971/2000, real env return = 4364.22


 99%|████████████████████████████████████▋| 1981/2000 [8:12:05<03:44, 11.79s/it]

Episode 1981/2000, real env return = 4110.41


100%|████████████████████████████████████▊| 1991/2000 [8:14:07<01:49, 12.15s/it]

Episode 1991/2000, real env return = 4638.06


100%|█████████████████████████████████████| 2000/2000 [8:15:55<00:00, 14.88s/it]


Training finished.
