# Import Site-Package

In [1]:
import random
import time,math
import numpy as np
import torch.nn as nn
import gymnasium as gym
import gymnasium.wrappers as gym_wrap
import matplotlib.pyplot as plt
import matplotlib.animation as animation #輸出動畫影片
from IPython import display
from tqdm import tqdm
import cv2

In [2]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
import collections
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Replay Buffer

In [3]:
class ReplayBuffer:
  def __init__(self,max_size=int(1e5), num_steps=1):
    self.s = np.zeros((max_size,4,72,72), dtype=np.float32)
    self.a = np.zeros((max_size,), dtype=np.int64)
    self.r = np.zeros((max_size, 1), dtype=np.float32)
    self.s_ = np.zeros((max_size,4,72,72), dtype=np.float32)
    self.done = np.zeros((max_size, 1), dtype=np.float32)
    self.info = np.zeros((max_size, 7), dtype=np.float32)
    self.ptr = 0
    self.size = 0
    self.max_size = max_size
    self.num_steps = num_steps

  def append(self,s,a,r,s_,done,info):
    self.s[self.ptr] = s
    self.a[self.ptr] = a
    self.r[self.ptr] = r
    self.s_[self.ptr] = s_
    self.done[self.ptr] = done
    self.info[self.ptr] = info
    self.ptr = (self.ptr + 1) % self.max_size
    self.size = min(self.size+1,self.max_size)

  def sample(self, batch_size):
    ind = np.random.randint(0, self.size, batch_size)
    s = torch.FloatTensor(self.s[ind]).to(device)
    s_ = torch.FloatTensor(self.s_[ind]).to(device)
    return s, torch.LongTensor(self.a[ind]).to(device), \
           torch.FloatTensor(self.r[ind]).to(device), \
           torch.FloatTensor(self.s_[ind]).to(device), \
           torch.FloatTensor(self.done[ind]).to(device), \
           torch.FloatTensor(self.info[ind]).to(device)

# VAE

### Encoder

In [4]:
class VAEEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, latent_size*2)

    def forward(self, x, latent_size = 50):
        x = self.fc2( torch.relu(self.fc1(x)) )
        mean, log_var = x.split(latent_size, dim=1)
        return mean, log_var

### Decoder

In [5]:
class VAEDecoder(nn.Module):
    def __init__(self, latent_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(latent_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.sigmoid( self.fc2( torch.relu(self.fc1(x)) ) )
        return x

### VAE Module

In [6]:
class VAE(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size):
        super().__init__()
        self.encoder = VAEEncoder(input_size, hidden_size, latent_size)
        self.decoder = VAEDecoder(latent_size, hidden_size, input_size)

    def forward(self, x):
        mean, log_var = self.encoder(x)
        std = torch.exp(0.5 * log_var)
        eps = torch.randn_like(std).to(device)
        z = mean + std * eps    #random sampling
        reconstruct = self.decoder(z)
        return reconstruct, mean, log_var

### Loss Function

In [7]:
def VAE_Loss(reconstruct, x, mean, log_var):
    reconstruct_loss = nn.MSELoss(reduction='sum')(reconstruct, x)
    kl_loss = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp())
    return reconstruct_loss + kl_loss

# Initialize

### load model

In [8]:
Load_File=0
Old_File=f"Model-{Load_File}.pt"
if Load_File>0:
  Log= np.load(f"Log-{Load_File}.npy", allow_pickle=True).item()
else:
  Log={"TrainReward":[],"TestReward":[],"Loss":[]}

In [9]:
VAE_model = VAE(input_size=5184, hidden_size=800, latent_size=50)
VAE_model.load_state_dict( torch.load("model_2500.pth") )
VAE_model.eval()

VAE(
  (encoder): VAEEncoder(
    (fc1): Linear(in_features=5184, out_features=800, bias=True)
    (fc2): Linear(in_features=800, out_features=100, bias=True)
  )
  (decoder): VAEDecoder(
    (fc1): Linear(in_features=50, out_features=800, bias=True)
    (fc2): Linear(in_features=800, out_features=5184, bias=True)
  )
)

### env

In [10]:
class ImageEnv(gym.Wrapper):
  def __init__(self, env, stack_frames=4, delay_op=50):
    super(ImageEnv, self).__init__(env)
    self.delay_op = delay_op
    self.stack_frames = stack_frames
  def reset(self):
    s, info = self.env.reset()
    for i in range(self.delay_op):
      s, r, terminated, truncated, info = self.env.step(0)
      s = cv2.cvtColor(s, cv2.COLOR_RGB2GRAY)
      s=s[:72, 12:84]/255.0 - 0.5
      self.stacked_state = np.tile( s , (self.stack_frames,1,1) )  # [4, 72, 72]
    return self.stacked_state, info

  def step(self, action):
    reward = 0
    for _ in range(self.stack_frames):
      s, r, terminated, truncated, info = self.env.step(action)
      if r==-100:terminated=True
      s = cv2.cvtColor(s, cv2.COLOR_RGB2GRAY)
      s=s[:72, 12:84]/255.0 - 0.5
      reward += r
      if terminated or truncated:break
      self.stacked_state = np.concatenate((self.stacked_state[1:], s[np.newaxis]), axis=0)
    return self.stacked_state, reward, terminated, truncated, info

In [11]:
env=gym.make('CarRacing-v3',render_mode="rgb_array",domain_randomize=False, continuous=False)
env = ImageEnv(env)

# DQN

In [12]:
class DQN(torch.nn.Module):
  def __init__(self, vae, n_act, latent_size = 50):
    super(DQN,self).__init__()
    self.vae = vae
    self.fc1 = torch.nn.Linear(latent_size+7, 256)
    self.fc2 = torch.nn.Linear(256, n_act)
  def forward(self, x, info, input_size = 5184):
    with torch.no_grad():
        mean, _ = self.vae.encoder(x[:, -1, :, :].view(x.shape[0], -1))
        info = torch.FloatTensor(info).reshape(7,-1)
        mean = torch.FloatTensor(mean).reshape(50,-1)
        x = torch.vstack((mean,info)).T
    x = torch.relu( self.fc1(x) )
    x = self.fc2(x)
    return x

# 搭建智能體Agent的類別

In [13]:
class DQNAgent():
  def __init__(self,gamma=0.9,eps_low=0.1,lr=0.00025):
    self.env = env
    self.n_act=self.env.action_space.n
    self.PredictDQN= DQN(VAE_model, self.n_act)
    self.TargetDQN= DQN(VAE_model, self.n_act)
    if Load_File>0:
      self.PredictDQN.load_state_dict(torch.load(Old_File))
      self.TargetDQN.load_state_dict(torch.load(Old_File))
    self.PredictDQN.to(device)
    self.TargetDQN.to(device)
    self.LossFun=torch.nn.SmoothL1Loss()
    self.optimizer=torch.optim.Adam(self.PredictDQN.parameters(),lr=lr)
    self.gamma=gamma
    self.eps_low=eps_low
    self.rb=ReplayBuffer(max_size=10000, num_steps=1)
  def change_info_type(self,_):
      with torch.no_grad():
          teml = [_["true_speed"],_["four_ABS_sensors"][0],_["four_ABS_sensors"][1],_["four_ABS_sensors"][2],_["four_ABS_sensors"][3],_["steering_wheel_position"],_["gyroscop"]]
          for i in range(7): teml[i] = np.float64(teml[i])
          info = np.array(teml)
          return info
  def PredictA(self,s,info):
    if s.ndim == 3:
        s = np.expand_dims(s, axis=0)  # 變成 (1, 4, 72, 72)
    with torch.no_grad():
        s_tensor = torch.FloatTensor(s).to(device)
        info_tensor = torch.FloatTensor(info).to(device)
        q_values = self.PredictDQN(s_tensor,info_tensor)
        return torch.argmax(q_values).item()
  def SelectA(self,a):
    return self.env.action_space.sample() if np.random.random()<self.EPS else a
  def Train(self,N_EPISODES):
    for i in tqdm(range(Load_File,N_EPISODES)):
      self.EPS=self.eps_low+(1-self.eps_low)*math.exp(-i*12/(N_EPISODES))
      total_reward=0
      s,info=self.env.reset()
      info = self.change_info_type(info)        
      while True:
        a=self.SelectA(self.PredictA(s,info))
        s_,r,done,stop,info=self.env.step(a)
        if done or stop: info = np.zeros(7,dtype=np.float64)
        else: info  = self.change_info_type(info)
        self.rb.append(s,a,r,s_,done,info)
        if self.rb.size > 200 and i%self.rb.num_steps==0:self.Learn()
        if i % 20==0:  self.TargetDQN.load_state_dict(self.PredictDQN.state_dict())
        s=s_
        total_reward+=r
        if done or stop:break
      # print(f"\n{total_reward}")
      Log["TrainReward"].append(total_reward)
      if i % 10 == 9:
        test_reward=self.Test()
        print(f"\n訓練次數{i+1}，總回報{test_reward}")
        Log["TestReward"].append(test_reward)
        torch.save(self.PredictDQN.state_dict(), f"Model-{i+1}.pt")
        np.save(f"Log-{i+1}.npy", Log)
  def Learn(self):
    self.optimizer.zero_grad()
    batch_s, batch_a, batch_r, batch_s_, batch_done, batch_info=self.rb.sample(32)
    predict_Q = (self.PredictDQN(batch_s.to(device),batch_info.to(device))*F.one_hot(batch_a.long().to(device),self.n_act)).sum(1,keepdims=True)
    with torch.no_grad():
      target_Q = batch_r.to(device)+(1-batch_done.to(device))*self.gamma*self.TargetDQN(batch_s_.to(device),batch_info.to(device)).max(1,keepdims=True)[0]
    loss = self.LossFun(predict_Q, target_Q)
    Log["Loss"].append(float(loss))
    loss.backward()
    self.optimizer.step()
  def Test(self,VIDEO=False):
    total_reward=0
    video=[]
    s,info=self.env.reset()
    info  = self.change_info_type(info)
    while True:
      video.append(self.env.render())
      a=self.PredictA(s,info)
      s,r,done,stop,info=self.env.step(a)
      if done or stop: info = np.zeros(7,dtype=np.float64)
      else: info  = self.change_info_type(info)
      total_reward+=r
      if done or stop:break
    if VIDEO:
      patch = plt.imshow(video[0]) #產生展示圖形物件
      plt.axis('off') #關閉坐標軸
      def animate(i): #設定更換影格的函數
        patch.set_data(video[i])
        #plt.gcf()=>建新繪圖區 animate=>更換影格函數 frames=>影格數 interval=>影隔間距(毫秒)
      anim = animation.FuncAnimation(plt.gcf(),animate,frames=len(video),interval=200)
      anim.save('Car_Racing.mp4') #儲存為mp4擋
    return total_reward
  def Record(self):
    total_reward=0
    s,_=self.env.reset()
    while True:
      image=self.env.render()
      plt.imshow(image)
      #plt.imsave(f"/content/drive/MyDrive/recording/{str(int(time.time()))}.png", image)
      a=self.PredictA(s)
      s,r,done,stop,_=self.env.step(a)
      print(r)
      total_reward+=r
      plt.pause(0.1)
      #清除目前的顯示
      display.clear_output(wait=True)
      if done or stop:break
    print(total_reward)

In [14]:
Agent=DQNAgent(gamma=0.95,eps_low=0.05,lr=0.00025)
Agent.Train(N_EPISODES=5000)

  0%|                                      | 10/5000 [02:23<18:51:47, 13.61s/it]


訓練次數10，總回報-2.9740614334471034


  0%|▏                                     | 20/5000 [04:48<20:11:38, 14.60s/it]


訓練次數20，總回報6.640579710145111


  1%|▏                                     | 30/5000 [06:31<14:01:57, 10.16s/it]


訓練次數30，總回報3.5039215686274607


  1%|▎                                     | 40/5000 [08:49<19:14:34, 13.97s/it]


訓練次數40，總回報4.067114093959741


  1%|▍                                     | 50/5000 [10:26<13:23:54,  9.74s/it]


訓練次數50，總回報4.676156583629906


  1%|▍                                     | 60/5000 [12:32<18:05:20, 13.18s/it]


訓練次數60，總回報28.40268456375822


  1%|▌                                     | 70/5000 [13:57<13:46:09, 10.05s/it]


訓練次數70，總回報3.4860139860139885


  2%|▌                                     | 80/5000 [15:30<12:35:05,  9.21s/it]


訓練次數80，總回報-2.857746478873243


  2%|▋                                     | 90/5000 [17:14<15:31:10, 11.38s/it]


訓練次數90，總回報-2.3650557620817843


  2%|▋                                    | 100/5000 [18:50<12:32:47,  9.22s/it]


訓練次數100，總回報0.017543859649126414


  2%|▊                                    | 110/5000 [20:32<15:21:53, 11.31s/it]


訓練次數110，總回報-2.3825622775800746


  2%|▉                                    | 120/5000 [21:40<10:44:10,  7.92s/it]


訓練次數120，總回報-2.5199261992619926


  3%|▉                                    | 130/5000 [23:03<12:54:50,  9.55s/it]


訓練次數130，總回報-17.86081504702191


  3%|█                                    | 140/5000 [24:24<14:53:54, 11.04s/it]


訓練次數140，總回報-2.3528301886792433


  3%|█▏                                    | 150/5000 [25:35<8:25:23,  6.25s/it]


訓練次數150，總回報22.443205574912852


  3%|█▏                                   | 160/5000 [26:56<11:31:12,  8.57s/it]


訓練次數160，總回報-2.5634069400631025


  3%|█▎                                   | 170/5000 [28:24<13:49:23, 10.30s/it]


訓練次數170，總回報84.87777777777782


  4%|█▎                                    | 180/5000 [29:22<8:55:11,  6.66s/it]


訓練次數180，總回報-1.9817034700315512


  4%|█▍                                   | 190/5000 [30:29<11:49:38,  8.85s/it]


訓練次數190，總回報-2.955555555555561


  4%|█▍                                   | 200/5000 [31:58<17:25:07, 13.06s/it]


訓練次數200，總回報-20.082352941176445


  4%|█▌                                   | 210/5000 [33:39<19:53:21, 14.95s/it]


訓練次數210，總回報-94.99999999999896


  4%|█▋                                   | 220/5000 [35:17<18:47:51, 14.16s/it]


訓練次數220，總回報-94.99999999999898


  5%|█▋                                   | 230/5000 [36:31<10:36:00,  8.00s/it]


訓練次數230，總回報-2.32340425531915


  5%|█▊                                   | 240/5000 [37:49<15:49:46, 11.97s/it]


訓練次數240，總回報53.27906976744128


  5%|█▊                                   | 250/5000 [39:15<10:34:53,  8.02s/it]


訓練次數250，總回報-3.8555555555555605


  5%|█▉                                   | 260/5000 [40:25<13:40:09, 10.38s/it]


訓練次數260，總回報4.959154929577606


  5%|█▉                                   | 270/5000 [41:39<10:46:03,  8.20s/it]


訓練次數270，總回報3.1463022508038634


  6%|██▏                                   | 280/5000 [42:45<7:34:23,  5.78s/it]


訓練次數280，總回報-3.7432432432432483


  6%|██▏                                  | 290/5000 [44:20<17:49:02, 13.62s/it]


訓練次數290，總回報-1.609968847352055


  6%|██▎                                   | 300/5000 [45:39<9:40:23,  7.41s/it]


訓練次數300，總回報-2.955555555555561


  6%|██▎                                  | 310/5000 [46:53<13:31:27, 10.38s/it]


訓練次數310，總回報8.40000000000002


  6%|██▎                                  | 320/5000 [48:01<11:18:04,  8.69s/it]


訓練次數320，總回報-2.266412213740455


  7%|██▍                                  | 330/5000 [49:42<20:48:45, 16.04s/it]


訓練次數330，總回報-94.99999999999896


  7%|██▌                                  | 340/5000 [51:08<11:52:33,  9.17s/it]


訓練次數340，總回報-3.7528735632183947


  7%|██▋                                   | 350/5000 [52:01<7:24:52,  5.74s/it]


訓練次數350，總回報-1.8948616600790449


  7%|██▋                                   | 360/5000 [52:54<7:05:50,  5.51s/it]


訓練次數360，總回報-2.6007299270073005


  7%|██▊                                   | 370/5000 [53:49<8:38:57,  6.73s/it]


訓練次數370，總回報-3.1993399339934037


  8%|██▉                                   | 380/5000 [54:37<7:30:45,  5.85s/it]


訓練次數380，總回報-0.7777777777777346


  8%|██▉                                  | 390/5000 [55:38<13:12:49, 10.32s/it]


訓練次數390，總回報-94.99999999999898


  8%|██▉                                  | 400/5000 [56:34<10:48:11,  8.45s/it]


訓練次數400，總回報6.493430656934487


  8%|███                                   | 410/5000 [57:21<5:43:48,  4.49s/it]


訓練次數410，總回報11.393594306049833


  8%|███▏                                  | 420/5000 [58:08<9:49:21,  7.72s/it]


訓練次數420，總回報2.2067796610170882


  9%|███▎                                  | 430/5000 [58:46<5:15:08,  4.14s/it]


訓練次數430，總回報-3.564052287581705


  9%|███▎                                  | 440/5000 [59:30<7:05:14,  5.60s/it]


訓練次數440，總回報-4.147619047619051


  9%|███▏                                | 450/5000 [1:00:23<6:26:50,  5.10s/it]


訓練次數450，總回報-3.133333333333338


  9%|███▎                                | 460/5000 [1:01:06<5:33:32,  4.41s/it]


訓練次數460，總回報-3.020338983050853


  9%|███▍                                | 470/5000 [1:01:51<5:38:59,  4.49s/it]


訓練次數470，總回報-3.4110367892976643


 10%|███▍                                | 480/5000 [1:02:45<9:39:34,  7.69s/it]


訓練次數480，總回報-3.631541218637995


 10%|███▌                                | 490/5000 [1:03:35<5:37:58,  4.50s/it]


訓練次數490，總回報18.963777089783285


 10%|███▌                                | 500/5000 [1:04:23<7:13:42,  5.78s/it]


訓練次數500，總回報-2.92714776632303


 10%|███▌                               | 510/5000 [1:05:28<13:49:16, 11.08s/it]


訓練次數510，總回報23.773946360152767


 10%|███▋                                | 520/5000 [1:06:13<6:14:11,  5.01s/it]


訓練次數520，總回報50.65838926174485


 11%|███▊                                | 530/5000 [1:07:12<8:25:40,  6.79s/it]


訓練次數530，總回報49.48867313915849


 11%|███▉                                | 540/5000 [1:08:11<8:30:10,  6.86s/it]


訓練次數540，總回報52.625850340135976


 11%|███▉                                | 550/5000 [1:08:52<5:40:24,  4.59s/it]


訓練次數550，總回報47.936908517350105


 11%|████                                | 560/5000 [1:09:34<5:36:08,  4.54s/it]


訓練次數560，總回報51.75838926174487


 11%|████                                | 570/5000 [1:10:29<6:41:16,  5.43s/it]


訓練次數570，總回報126.57940199335573


 12%|████▏                               | 580/5000 [1:11:18<7:06:25,  5.79s/it]


訓練次數580，總回報43.70209059233428


 12%|████▏                               | 590/5000 [1:12:06<7:29:34,  6.12s/it]


訓練次數590，總回報56.34532374100712


 12%|████▎                               | 600/5000 [1:12:56<6:52:26,  5.62s/it]


訓練次數600，總回報46.699999999999854


 12%|████▍                               | 610/5000 [1:13:39<5:52:24,  4.82s/it]


訓練次數610，總回報17.899999999999995


 12%|████▍                               | 620/5000 [1:14:20<5:50:46,  4.81s/it]


訓練次數620，總回報53.97222222222212


 13%|████▌                               | 630/5000 [1:15:20<8:44:25,  7.20s/it]


訓練次數630，總回報45.750759878419366


 13%|████▌                               | 640/5000 [1:15:59<5:40:57,  4.69s/it]


訓練次數640，總回報56.10035842293898


 13%|████▋                               | 650/5000 [1:16:42<5:57:28,  4.93s/it]


訓練次數650，總回報50.29508196721306


 13%|████▊                               | 660/5000 [1:17:25<5:24:28,  4.49s/it]


訓練次數660，總回報49.688311688311586


 13%|████▊                               | 670/5000 [1:18:06<6:25:42,  5.34s/it]


訓練次數670，總回報58.37037037037027


 14%|████▉                               | 680/5000 [1:18:45<5:33:17,  4.63s/it]


訓練次數680，總回報50.49999999999993


 14%|████▉                               | 690/5000 [1:19:20<4:37:22,  3.86s/it]


訓練次數690，總回報46.092307692307614


 14%|█████                               | 700/5000 [1:19:57<4:09:29,  3.48s/it]


訓練次數700，總回報4.614285714285727


 14%|█████                               | 710/5000 [1:20:39<5:23:56,  4.53s/it]


訓練次數710，總回報48.89743589743581


 14%|█████▏                              | 720/5000 [1:21:17<4:55:02,  4.14s/it]


訓練次數720，總回報2.7495575221238933


 15%|█████▎                              | 730/5000 [1:22:02<7:36:34,  6.42s/it]


訓練次數730，總回報27.515658362989054


 15%|█████▎                              | 740/5000 [1:22:35<4:18:11,  3.64s/it]


訓練次數740，總回報12.826591760299639


 15%|█████▍                              | 750/5000 [1:23:18<5:11:45,  4.40s/it]


訓練次數750，總回報4.926315789473696


 15%|█████▍                              | 760/5000 [1:23:59<5:18:20,  4.50s/it]


訓練次數760，總回報41.03157894736836


 15%|█████▌                              | 770/5000 [1:24:37<5:05:40,  4.34s/it]


訓練次數770，總回報47.19003115264791


 16%|█████▌                              | 780/5000 [1:25:24<6:48:30,  5.81s/it]


訓練次數780，總回報47.00621118012415


 16%|█████▋                              | 790/5000 [1:26:09<5:09:38,  4.41s/it]


訓練次數790，總回報0.3671140939597417


 16%|█████▊                              | 800/5000 [1:26:57<6:03:17,  5.19s/it]


訓練次數800，總回報56.840579710144844


 16%|█████▊                              | 810/5000 [1:27:40<6:06:19,  5.25s/it]


訓練次數810，總回報18.096385542168676


 16%|█████▉                              | 820/5000 [1:28:29<7:47:44,  6.71s/it]


訓練次數820，總回報45.182334384857995


 17%|█████▉                              | 830/5000 [1:29:17<4:56:59,  4.27s/it]


訓練次數830，總回報-0.06913183279743196


 17%|██████                              | 840/5000 [1:29:53<4:23:55,  3.81s/it]


訓練次數840，總回報4.852688172043023


 17%|██████                              | 850/5000 [1:30:33<5:10:54,  4.50s/it]


訓練次數850，總回報52.846416382252485


 17%|██████▏                             | 860/5000 [1:31:18<5:45:56,  5.01s/it]


訓練次數860，總回報53.97222222222212


 17%|██████▎                             | 870/5000 [1:32:04<5:22:24,  4.68s/it]


訓練次數870，總回報55.3758865248226


 18%|██████▎                             | 880/5000 [1:32:41<5:03:41,  4.42s/it]


訓練次數880，總回報52.18918918918911


 18%|██████▍                             | 890/5000 [1:33:19<4:41:38,  4.11s/it]


訓練次數890，總回報51.12292358803977


 18%|██████▎                            | 900/5000 [1:34:16<10:45:37,  9.45s/it]


訓練次數900，總回報-94.99999999999898


 18%|██████▎                            | 910/5000 [1:35:23<11:55:43, 10.50s/it]


訓練次數910，總回報-94.999999999999


 18%|██████▍                            | 920/5000 [1:36:28<11:34:49, 10.22s/it]


訓練次數920，總回報-94.999999999999


 19%|██████▋                             | 930/5000 [1:37:13<5:19:09,  4.71s/it]


訓練次數930，總回報40.92479108635091


 19%|██████▊                             | 940/5000 [1:37:52<4:21:24,  3.86s/it]


訓練次數940，總回報7.061736334405156


 19%|██████▊                             | 950/5000 [1:38:36<5:11:19,  4.61s/it]


訓練次數950，總回報53.61739130434775


 19%|██████▋                            | 960/5000 [1:39:34<10:35:03,  9.43s/it]


訓練次數960，總回報-94.99999999999895


 19%|██████▉                             | 970/5000 [1:40:19<5:26:46,  4.87s/it]


訓練次數970，總回報49.093247588424376


 20%|███████                             | 980/5000 [1:40:57<5:21:58,  4.81s/it]


訓練次數980，總回報53.97222222222212


 20%|███████▏                            | 990/5000 [1:41:38<5:00:26,  4.50s/it]


訓練次數990，總回報50.49999999999993


 20%|███████                            | 1000/5000 [1:42:17<5:26:27,  4.90s/it]


訓練次數1000，總回報110.51387900355905


 20%|███████                            | 1010/5000 [1:42:59<4:54:26,  4.43s/it]


訓練次數1010，總回報55.61565836298924


 20%|███████▏                           | 1020/5000 [1:43:45<5:48:16,  5.25s/it]


訓練次數1020，總回報135.93465703971142


 21%|███████▏                           | 1030/5000 [1:44:28<5:05:24,  4.62s/it]


訓練次數1030，總回報54.20209059233438


 21%|███████▎                           | 1040/5000 [1:45:11<5:38:13,  5.12s/it]


訓練次數1040，總回報107.64212454212478


 21%|███████▎                           | 1050/5000 [1:45:53<5:53:08,  5.36s/it]


訓練次數1050，總回報118.76363636363652


 21%|███████▍                           | 1060/5000 [1:46:33<5:13:13,  4.77s/it]


訓練次數1060，總回報49.093247588424376


 21%|███████▍                           | 1070/5000 [1:47:12<5:02:46,  4.62s/it]


訓練次數1070，總回報45.182334384857995


 22%|███████▌                           | 1080/5000 [1:47:52<5:14:14,  4.81s/it]


訓練次數1080，總回報105.1712177121774


 22%|███████▋                           | 1090/5000 [1:48:31<5:06:46,  4.71s/it]


訓練次數1090，總回報43.882352941176414


 22%|███████▋                           | 1100/5000 [1:49:10<4:39:44,  4.30s/it]


訓練次數1100，總回報57.8980694980694


 22%|███████▊                           | 1110/5000 [1:49:52<5:01:23,  4.65s/it]


訓練次數1110，總回報53.06849315068485


 22%|███████▊                           | 1120/5000 [1:50:29<4:35:28,  4.26s/it]


訓練次數1120，總回報55.3758865248226


 23%|███████▉                           | 1130/5000 [1:51:05<4:21:06,  4.05s/it]


訓練次數1130，總回報53.06849315068485


 23%|███████▉                           | 1140/5000 [1:51:45<5:16:45,  4.92s/it]


訓練次數1140，總回報44.2130177514792


 23%|████████                           | 1150/5000 [1:52:28<5:08:01,  4.80s/it]


訓練次數1150，總回報56.59205776173278


 23%|████████                           | 1160/5000 [1:53:06<5:00:53,  4.70s/it]


訓練次數1160，總回報121.87551020408185


 23%|████████▏                          | 1170/5000 [1:53:50<5:32:16,  5.21s/it]


訓練次數1170，總回報59.969696969696884


 24%|████████▎                          | 1180/5000 [1:54:27<4:02:02,  3.80s/it]


訓練次數1180，總回報4.552961672473878


 24%|████████▎                          | 1190/5000 [1:54:59<4:24:48,  4.17s/it]


訓練次數1190，總回報105.41772151898749


 24%|████████▍                          | 1200/5000 [1:55:31<4:06:12,  3.89s/it]


訓練次數1200，總回報49.83344709897604


 24%|████████▍                          | 1210/5000 [1:56:12<5:06:09,  4.85s/it]


訓練次數1210，總回報56.34532374100712


 24%|████████▌                          | 1220/5000 [1:56:56<6:07:17,  5.83s/it]


訓練次數1220，總回報141.46877323420122


 25%|████████▌                          | 1230/5000 [1:57:37<5:07:39,  4.90s/it]


訓練次數1230，總回報116.64545454545484


 25%|████████▋                          | 1240/5000 [1:58:14<4:29:03,  4.29s/it]


訓練次數1240，總回報53.06849315068485


 25%|████████▊                          | 1250/5000 [1:58:54<4:13:10,  4.05s/it]


訓練次數1250，總回報3.2757396449704217


 25%|████████▊                          | 1260/5000 [1:59:28<4:24:00,  4.24s/it]


訓練次數1260，總回報52.625850340135976


 25%|████████▉                          | 1270/5000 [2:00:11<5:33:18,  5.36s/it]


訓練次數1270，總回報48.89743589743581


 26%|████████▉                          | 1280/5000 [2:00:45<4:24:04,  4.26s/it]


訓練次數1280，總回報132.6491349480971


 26%|█████████                          | 1290/5000 [2:01:27<4:33:17,  4.42s/it]


訓練次數1290，總回報63.99999999999997


 26%|█████████                          | 1300/5000 [2:02:09<5:33:15,  5.40s/it]


訓練次數1300，總回報49.00606060606055


 26%|█████████▏                         | 1310/5000 [2:02:46<4:32:08,  4.43s/it]


訓練次數1310，總回報51.33333333333326


 26%|█████████▏                         | 1320/5000 [2:03:25<4:26:13,  4.34s/it]


訓練次數1320，總回報52.40677966101684


 27%|█████████▎                         | 1330/5000 [2:04:10<4:41:08,  4.60s/it]


訓練次數1330，總回報58.37037037037027


 27%|█████████▍                         | 1340/5000 [2:04:48<4:23:27,  4.32s/it]


訓練次數1340，總回報49.48867313915849


 27%|█████████▍                         | 1350/5000 [2:05:29<4:52:40,  4.81s/it]


訓練次數1350，總回報61.076923076923016


 27%|█████████▌                         | 1360/5000 [2:06:07<4:24:10,  4.35s/it]


訓練次數1360，總回報48.8026845637583


 27%|█████████▌                         | 1370/5000 [2:06:47<4:23:21,  4.35s/it]


訓練次數1370，總回報56.10035842293898


 28%|█████████▋                         | 1380/5000 [2:07:26<4:50:58,  4.82s/it]


訓練次數1380，總回報47.936908517350105


 28%|█████████▋                         | 1390/5000 [2:08:08<4:59:17,  4.97s/it]


訓練次數1390，總回報119.10000000000025


 28%|█████████▊                         | 1400/5000 [2:08:44<4:43:31,  4.73s/it]


訓練次數1400，總回報128.02014652014685


 28%|█████████▊                         | 1410/5000 [2:09:26<5:38:04,  5.65s/it]


訓練次數1410，總回報107.80000000000017


 28%|█████████▉                         | 1420/5000 [2:10:04<4:19:09,  4.34s/it]


訓練次數1420，總回報53.29209621993119


 29%|██████████                         | 1430/5000 [2:10:43<4:28:15,  4.51s/it]


訓練次數1430，總回報45.57575757575751


 29%|██████████                         | 1440/5000 [2:11:21<4:42:38,  4.76s/it]


訓練次數1440，總回報59.69811320754708


 29%|██████████▏                        | 1450/5000 [2:12:01<4:37:08,  4.68s/it]


訓練次數1450，總回報52.846416382252485


 29%|██████████▏                        | 1460/5000 [2:12:43<4:32:37,  4.62s/it]


訓練次數1460，總回報61.92996108949412


 29%|██████████▎                        | 1470/5000 [2:13:24<4:43:17,  4.82s/it]


訓練次數1470，總回報103.4000000000002


 30%|██████████▎                        | 1480/5000 [2:14:05<4:30:11,  4.61s/it]


訓練次數1480，總回報43.23255813953481


 30%|██████████▍                        | 1490/5000 [2:14:41<3:41:03,  3.78s/it]


訓練次數1490，總回報4.203921568627461


 30%|██████████▌                        | 1500/5000 [2:15:14<3:39:10,  3.76s/it]


訓練次數1500，總回報22.529629629629607


 30%|██████████▌                        | 1510/5000 [2:15:47<3:11:15,  3.29s/it]


訓練次數1510，總回報4.780622837370252


 30%|██████████▋                        | 1520/5000 [2:16:21<4:02:23,  4.18s/it]


訓練次數1520，總回報45.40181268882168


 31%|██████████▋                        | 1530/5000 [2:16:55<4:15:27,  4.42s/it]


訓練次數1530，總回報49.29032258064509


 31%|██████████▊                        | 1540/5000 [2:17:36<4:30:54,  4.70s/it]


訓練次數1540，總回報47.19003115264791


 31%|██████████▊                        | 1550/5000 [2:18:20<5:23:57,  5.63s/it]


訓練次數1550，總回報112.87248322147678


 31%|██████████▉                        | 1560/5000 [2:18:54<3:19:38,  3.48s/it]


訓練次數1560，總回報12.79260450160773


 31%|██████████▉                        | 1570/5000 [2:19:37<4:40:58,  4.92s/it]


訓練次數1570，總回報24.26245954692554


 32%|███████████                        | 1580/5000 [2:20:13<4:57:59,  5.23s/it]


訓練次數1580，總回報149.56722689075667


 32%|███████████▏                       | 1590/5000 [2:20:53<4:35:15,  4.84s/it]


訓練次數1590，總回報47.333566433566254


 32%|███████████▏                       | 1600/5000 [2:21:31<4:28:38,  4.74s/it]


訓練次數1600，總回報86.75853658536587


 32%|███████████▎                       | 1610/5000 [2:22:04<3:15:54,  3.47s/it]


訓練次數1610，總回報3.454140127388542


 32%|███████████▎                       | 1620/5000 [2:22:42<4:44:00,  5.04s/it]


訓練次數1620，總回報54.20209059233438


 33%|███████████▍                       | 1630/5000 [2:23:25<4:51:21,  5.19s/it]


訓練次數1630，總回報49.337809187278964


 33%|███████████▍                       | 1640/5000 [2:24:01<4:00:17,  4.29s/it]


訓練次數1640，總回報47.00621118012415


 33%|███████████▌                       | 1650/5000 [2:24:45<4:34:29,  4.92s/it]


訓練次數1650，總回報62.50980392156858


 33%|███████████▌                       | 1660/5000 [2:25:27<4:51:31,  5.24s/it]


訓練次數1660，總回報56.59205776173278


 33%|███████████▋                       | 1670/5000 [2:26:05<4:00:01,  4.32s/it]


訓練次數1670，總回報48.126582278480925


 34%|███████████▊                       | 1680/5000 [2:26:46<4:33:38,  4.95s/it]


訓練次數1680，總回報59.42857142857131


 34%|███████████▊                       | 1690/5000 [2:27:28<4:33:07,  4.95s/it]


訓練次數1690，總回報47.389189189189054


 34%|███████████▉                       | 1700/5000 [2:28:14<4:23:05,  4.78s/it]


訓練次數1700，總回報47.4163934426229


 34%|███████████▉                       | 1710/5000 [2:28:57<5:27:35,  5.97s/it]


訓練次數1710，總回報105.22738853503242


 34%|████████████                       | 1720/5000 [2:29:38<4:12:25,  4.62s/it]


訓練次數1720，總回報50.913907284768115


 35%|████████████                       | 1730/5000 [2:30:29<6:31:37,  7.19s/it]


訓練次數1730，總回報100.15719063545225


 35%|████████████▏                      | 1740/5000 [2:31:11<4:46:54,  5.28s/it]


訓練次數1740，總回報38.1026845637582


 35%|████████████▎                      | 1750/5000 [2:31:46<3:47:14,  4.20s/it]


訓練次數1750，總回報45.907987220447204


 35%|████████████▎                      | 1760/5000 [2:32:32<6:13:58,  6.93s/it]


訓練次數1760，總回報131.9762589928062


 35%|████████████▍                      | 1770/5000 [2:33:13<4:43:34,  5.27s/it]


訓練次數1770，總回報43.60606060606049


 36%|████████████▍                      | 1780/5000 [2:34:02<5:48:33,  6.49s/it]


訓練次數1780，總回報97.42337662337691


 36%|████████████▌                      | 1790/5000 [2:34:48<4:35:36,  5.15s/it]


訓練次數1790，總回報57.831970260222946


 36%|████████████▌                      | 1800/5000 [2:35:34<4:45:25,  5.35s/it]


訓練次數1800，總回報53.97222222222212


 36%|████████████▋                      | 1810/5000 [2:36:16<4:07:33,  4.66s/it]


訓練次數1810，總回報57.09090909090898


 36%|████████████▋                      | 1820/5000 [2:37:06<5:23:37,  6.11s/it]


訓練次數1820，總回報114.0206185567014


 37%|████████████▊                      | 1830/5000 [2:37:49<4:18:02,  4.88s/it]


訓練次數1830，總回報50.255670103092704


 37%|████████████▉                      | 1840/5000 [2:38:28<4:03:19,  4.62s/it]


訓練次數1840，總回報49.624489795918294


 37%|████████████▉                      | 1850/5000 [2:39:16<4:38:15,  5.30s/it]


訓練次數1850，總回報50.29508196721306


 37%|█████████████                      | 1860/5000 [2:39:59<4:29:16,  5.15s/it]


訓練次數1860，總回報49.073063973063874


 37%|█████████████                      | 1870/5000 [2:40:38<3:43:01,  4.28s/it]


訓練次數1870，總回報48.00264900662243


 38%|█████████████▏                     | 1880/5000 [2:41:19<3:39:29,  4.22s/it]


訓練次數1880，總回報55.61565836298924


 38%|█████████████▏                     | 1890/5000 [2:42:06<4:32:50,  5.26s/it]


訓練次數1890，總回報49.24383561643827


 38%|█████████████▎                     | 1900/5000 [2:42:57<5:16:52,  6.13s/it]


訓練次數1900，總回報46.65242718446594


 38%|█████████████▎                     | 1910/5000 [2:43:41<4:14:34,  4.94s/it]


訓練次數1910，總回報42.61686746987944


 38%|█████████████▍                     | 1920/5000 [2:44:28<5:24:52,  6.33s/it]


訓練次數1920，總回報41.389189189189


 39%|█████████████▌                     | 1930/5000 [2:45:07<3:57:45,  4.65s/it]


訓練次數1930，總回報50.73333333333325


 39%|█████████████▌                     | 1940/5000 [2:45:53<4:26:36,  5.23s/it]


訓練次數1940，總回報52.18918918918911


 39%|█████████████▋                     | 1950/5000 [2:46:35<4:15:02,  5.02s/it]


訓練次數1950，總回報45.57575757575751


 39%|█████████████▋                     | 1960/5000 [2:47:12<3:33:11,  4.21s/it]


訓練次數1960，總回報43.445871559632955


 39%|█████████████▊                     | 1970/5000 [2:47:53<3:20:24,  3.97s/it]


訓練次數1970，總回報21.05399361022363


 40%|█████████████▊                     | 1980/5000 [2:48:25<3:34:36,  4.26s/it]


訓練次數1980，總回報54.44306569343051


 40%|█████████████▉                     | 1990/5000 [2:49:02<3:34:41,  4.28s/it]


訓練次數1990，總回報49.495081967213046


 40%|██████████████                     | 2000/5000 [2:49:40<3:27:53,  4.16s/it]


訓練次數2000，總回報45.40181268882168


 40%|██████████████                     | 2010/5000 [2:50:13<2:53:56,  3.49s/it]


訓練次數2010，總回報20.650980392156846


 40%|██████████████▏                    | 2020/5000 [2:50:48<3:22:10,  4.07s/it]


訓練次數2020，總回報14.77902097902099


 41%|██████████████▏                    | 2030/5000 [2:51:22<4:16:22,  5.18s/it]


訓練次數2030，總回報47.925850340135916


 41%|██████████████▎                    | 2040/5000 [2:51:58<3:04:15,  3.73s/it]


訓練次數2040，總回報7.103225806451624


 41%|██████████████▎                    | 2050/5000 [2:52:32<3:03:30,  3.73s/it]


訓練次數2050，總回報7.357894736842114


 41%|██████████████▍                    | 2060/5000 [2:53:03<2:39:30,  3.26s/it]


訓練次數2060，總回報8.992753623188419


 41%|██████████████▍                    | 2070/5000 [2:53:37<2:47:04,  3.42s/it]


訓練次數2070，總回報7.614754098360666


 42%|██████████████▌                    | 2080/5000 [2:54:09<3:49:53,  4.72s/it]


訓練次數2080，總回報123.25714285714304


 42%|██████████████▋                    | 2090/5000 [2:54:45<3:30:47,  4.35s/it]


訓練次數2090，總回報43.07246376811586


 42%|██████████████▋                    | 2100/5000 [2:55:25<3:59:29,  4.96s/it]


訓練次數2100，總回報51.12292358803977


 42%|██████████████▊                    | 2110/5000 [2:56:06<4:08:03,  5.15s/it]


訓練次數2110，總回報48.702875399360934


 42%|██████████████▊                    | 2120/5000 [2:56:45<3:55:13,  4.90s/it]


訓練次數2120，總回報51.75838926174487


 43%|██████████████▉                    | 2130/5000 [2:57:24<3:51:20,  4.84s/it]


訓練次數2130，總回報48.20066445182715


 43%|██████████████▉                    | 2140/5000 [2:58:00<3:35:11,  4.51s/it]


訓練次數2140，總回報60.1424460431654


 43%|███████████████                    | 2150/5000 [2:58:38<3:31:43,  4.46s/it]


訓練次數2150，總回報46.282208588957005


 43%|███████████████                    | 2160/5000 [2:59:17<3:43:44,  4.73s/it]


訓練次數2160，總回報122.18776978417286


 43%|███████████████▏                   | 2170/5000 [2:59:57<3:38:41,  4.64s/it]


訓練次數2170，總回報56.10035842293898


 44%|███████████████▎                   | 2180/5000 [3:00:40<4:02:59,  5.17s/it]


訓練次數2180，總回報115.46164383561673


 44%|███████████████▎                   | 2190/5000 [3:01:19<3:38:16,  4.66s/it]


訓練次數2190，總回報57.34306569343055


 44%|███████████████▍                   | 2200/5000 [3:01:56<3:23:16,  4.36s/it]


訓練次數2200，總回報50.913907284768115


 44%|███████████████▍                   | 2210/5000 [3:02:37<3:27:28,  4.46s/it]


訓練次數2210，總回報45.40181268882168


 44%|███████████████▌                   | 2220/5000 [3:03:16<3:38:20,  4.71s/it]


訓練次數2220，總回報64.92307692307692


 45%|███████████████▌                   | 2230/5000 [3:03:59<3:47:23,  4.93s/it]


訓練次數2230，總回報57.85294117647052


 45%|███████████████▋                   | 2240/5000 [3:04:41<4:17:42,  5.60s/it]


訓練次數2240，總回報43.172222222222025


 45%|███████████████▊                   | 2250/5000 [3:05:25<2:55:43,  3.83s/it]


訓練次數2250，總回報14.358083832335343


 45%|███████████████▊                   | 2260/5000 [3:06:01<3:26:51,  4.53s/it]


訓練次數2260，總回報54.90140845070416


 45%|███████████████▉                   | 2270/5000 [3:06:46<3:36:49,  4.77s/it]


訓練次數2270，總回報60.24334600760446


 46%|███████████████▉                   | 2280/5000 [3:07:28<3:27:11,  4.57s/it]


訓練次數2280，總回報56.092057761732775


 46%|████████████████                   | 2290/5000 [3:08:09<3:43:21,  4.95s/it]


訓練次數2290，總回報37.84545454545444


 46%|████████████████                   | 2300/5000 [3:08:50<3:20:09,  4.45s/it]


訓練次數2300，總回報55.85714285714276


 46%|████████████████▏                  | 2310/5000 [3:09:27<3:12:31,  4.29s/it]


訓練次數2310，總回報50.15107913669058


 46%|████████████████▏                  | 2320/5000 [3:10:09<3:32:00,  4.75s/it]


訓練次數2320，總回報59.07692307692296


 47%|████████████████▎                  | 2330/5000 [3:10:49<3:25:11,  4.61s/it]


訓練次數2330，總回報48.60066889632099


 47%|████████████████▍                  | 2340/5000 [3:11:30<3:35:15,  4.86s/it]


訓練次數2340，總回報51.545150501672154


 47%|████████████████▍                  | 2350/5000 [3:12:10<3:15:48,  4.43s/it]


訓練次數2350，總回報52.6857142857142


 47%|████████████████▌                  | 2360/5000 [3:12:47<2:53:24,  3.94s/it]


訓練次數2360，總回報14.906853582554515


 47%|████████████████▌                  | 2370/5000 [3:13:28<3:47:30,  5.19s/it]


訓練次數2370，總回報44.84515050167208


 48%|████████████████▋                  | 2380/5000 [3:14:13<4:03:21,  5.57s/it]


訓練次數2380，總回報43.882352941176414


 48%|████████████████▋                  | 2390/5000 [3:14:51<3:25:37,  4.73s/it]


訓練次數2390，總回報100.37072243346034


 48%|████████████████▊                  | 2400/5000 [3:15:31<3:04:53,  4.27s/it]


訓練次數2400，總回報17.461403508771916


 48%|████████████████▊                  | 2410/5000 [3:16:19<4:05:18,  5.68s/it]


訓練次數2410，總回報56.34532374100712


 48%|████████████████▉                  | 2420/5000 [3:16:58<3:08:09,  4.38s/it]


訓練次數2420，總回報27.56557377049178


 49%|█████████████████                  | 2430/5000 [3:17:42<3:51:52,  5.41s/it]


訓練次數2430，總回報104.07142857142894


 49%|█████████████████                  | 2440/5000 [3:18:14<3:00:31,  4.23s/it]


訓練次數2440，總回報11.24137931034484


 49%|█████████████████▏                 | 2450/5000 [3:18:55<3:28:02,  4.90s/it]


訓練次數2450，總回報47.39324758842435


 49%|█████████████████▏                 | 2460/5000 [3:19:28<2:42:12,  3.83s/it]


訓練次數2460，總回報44.02755417956649


 49%|█████████████████▎                 | 2470/5000 [3:20:05<2:22:14,  3.37s/it]


訓練次數2470，總回報4.780622837370254


 50%|█████████████████▎                 | 2480/5000 [3:20:53<3:46:53,  5.40s/it]


訓練次數2480，總回報24.65806451612901


 50%|█████████████████▍                 | 2490/5000 [3:21:42<4:33:16,  6.53s/it]


訓練次數2490，總回報35.10377358490552


 50%|█████████████████▌                 | 2500/5000 [3:22:41<5:21:38,  7.72s/it]


訓練次數2500，總回報105.64117647058903


 50%|█████████████████▌                 | 2510/5000 [3:23:25<3:19:15,  4.80s/it]


訓練次數2510，總回報49.643835616438274


 50%|█████████████████▋                 | 2520/5000 [3:24:14<3:38:01,  5.27s/it]


訓練次數2520，總回報48.24394463667806


 51%|█████████████████▋                 | 2530/5000 [3:25:02<3:39:15,  5.33s/it]


訓練次數2530，總回報51.12292358803977


 51%|█████████████████▊                 | 2540/5000 [3:25:49<3:41:35,  5.40s/it]


訓練次數2540，總回報46.81724137931018


 51%|█████████████████▊                 | 2550/5000 [3:26:37<4:14:33,  6.23s/it]


訓練次數2550，總回報36.232558139534746


 51%|█████████████████▉                 | 2560/5000 [3:27:23<3:59:18,  5.88s/it]


訓練次數2560，總回報37.233447098975915


 51%|█████████████████▉                 | 2570/5000 [3:28:29<5:24:50,  8.02s/it]


訓練次數2570，總回報72.3118811881189


 52%|██████████████████                 | 2580/5000 [3:29:12<3:23:49,  5.05s/it]


訓練次數2580，總回報15.63589743589744


 52%|██████████████████▏                | 2590/5000 [3:29:58<4:26:56,  6.65s/it]


訓練次數2590，總回報132.75533596838002


 52%|██████████████████▏                | 2600/5000 [3:30:45<3:54:55,  5.87s/it]


訓練次數2600，總回報112.74754098360691


 52%|██████████████████▎                | 2610/5000 [3:31:36<4:35:24,  6.91s/it]


訓練次數2610，總回報43.845150501672066


 52%|██████████████████▎                | 2620/5000 [3:32:23<3:46:06,  5.70s/it]


訓練次數2620，總回報28.964516129032063


 53%|██████████████████▍                | 2630/5000 [3:33:16<4:27:51,  6.78s/it]


訓練次數2630，總回報39.18918918918899


 53%|██████████████████▍                | 2640/5000 [3:34:11<4:13:21,  6.44s/it]


訓練次數2640，總回報107.89249011857763


 53%|██████████████████▌                | 2650/5000 [3:35:03<4:01:58,  6.18s/it]


訓練次數2650，總回報42.4006644518271


 53%|██████████████████▌                | 2660/5000 [3:35:59<3:55:29,  6.04s/it]


訓練次數2660，總回報84.6745762711866


 53%|██████████████████▋                | 2670/5000 [3:36:39<3:31:54,  5.46s/it]


訓練次數2670，總回報32.07142857142841


 54%|██████████████████▊                | 2680/5000 [3:37:15<2:16:27,  3.53s/it]


訓練次數2680，總回報2.9206896551724166


 54%|██████████████████▊                | 2690/5000 [3:38:02<4:35:03,  7.14s/it]


訓練次數2690，總回報50.792057761732714


 54%|██████████████████▉                | 2700/5000 [3:38:39<3:03:19,  4.78s/it]


訓練次數2700，總回報42.141558441558296


 54%|██████████████████▉                | 2710/5000 [3:39:34<4:41:21,  7.37s/it]


訓練次數2710，總回報42.34306569343042


 54%|███████████████████                | 2720/5000 [3:40:20<3:21:01,  5.29s/it]


訓練次數2720，總回報19.117228464419448


 55%|███████████████████                | 2730/5000 [3:41:00<2:06:05,  3.33s/it]


訓練次數2730，總回報4.476156583629907


 55%|███████████████████▏               | 2740/5000 [3:41:37<3:34:52,  5.70s/it]


訓練次數2740，總回報90.12727272727295


 55%|███████████████████▎               | 2750/5000 [3:42:27<3:50:08,  6.14s/it]


訓練次數2750，總回報36.88925081433206


 55%|███████████████████▎               | 2760/5000 [3:43:23<4:25:14,  7.10s/it]


訓練次數2760，總回報43.4920962199311


 55%|███████████████████▍               | 2770/5000 [3:44:07<3:28:43,  5.62s/it]


訓練次數2770，總回報42.40627062706252


 56%|███████████████████▍               | 2780/5000 [3:44:47<2:37:52,  4.27s/it]


訓練次數2780，總回報11.772289156626519


 56%|███████████████████▌               | 2790/5000 [3:45:30<2:47:24,  4.55s/it]


訓練次數2790，總回報50.70627062706261


 56%|███████████████████▌               | 2800/5000 [3:46:15<3:23:25,  5.55s/it]


訓練次數2800，總回報110.53860182370865


 56%|███████████████████▋               | 2810/5000 [3:46:56<2:15:46,  3.72s/it]


訓練次數2810，總回報10.701650165016515


 56%|███████████████████▋               | 2820/5000 [3:47:31<2:41:47,  4.45s/it]


訓練次數2820，總回報51.75838926174487


 57%|███████████████████▊               | 2830/5000 [3:48:10<2:49:28,  4.69s/it]


訓練次數2830，總回報44.474766355140126


 57%|███████████████████▉               | 2840/5000 [3:48:52<2:51:00,  4.75s/it]


訓練次數2840，總回報47.74842767295593


 57%|███████████████████▉               | 2850/5000 [3:49:30<2:41:25,  4.50s/it]


訓練次數2850，總回報55.3758865248226


 57%|████████████████████               | 2860/5000 [3:50:08<2:47:21,  4.69s/it]


訓練次數2860，總回報42.29221556886221


 57%|████████████████████               | 2870/5000 [3:50:46<2:40:10,  4.51s/it]


訓練次數2870，總回報52.625850340135976


 58%|████████████████████▏              | 2880/5000 [3:51:27<3:02:41,  5.17s/it]


訓練次數2880，總回報102.65910780669165


 58%|████████████████████▏              | 2890/5000 [3:52:08<2:49:29,  4.82s/it]


訓練次數2890，總回報53.74394463667813


 58%|████████████████████▎              | 2900/5000 [3:52:51<2:44:57,  4.71s/it]


訓練次數2900，總回報59.69811320754708


 58%|████████████████████▎              | 2910/5000 [3:53:30<2:41:35,  4.64s/it]


訓練次數2910，總回報50.29508196721306


 58%|████████████████████▍              | 2920/5000 [3:54:12<2:56:50,  5.10s/it]


訓練次數2920，總回報109.35098039215714


 59%|████████████████████▌              | 2930/5000 [3:54:58<3:09:11,  5.48s/it]


訓練次數2930，總回報137.61641791044812


 59%|████████████████████▌              | 2940/5000 [3:55:53<5:17:08,  9.24s/it]


訓練次數2940，總回報-94.99999999999898


 59%|████████████████████▋              | 2950/5000 [3:56:51<5:00:09,  8.79s/it]


訓練次數2950，總回報59.69811320754708


 59%|████████████████████▋              | 2960/5000 [3:57:30<2:38:45,  4.67s/it]


訓練次數2960，總回報50.49999999999993


 59%|████████████████████▊              | 2970/5000 [3:58:10<2:39:54,  4.73s/it]


訓練次數2970，總回報100.34137931034503


 60%|████████████████████▊              | 2980/5000 [3:58:52<2:44:19,  4.88s/it]


訓練次數2980，總回報106.99550561797773


 60%|████████████████████▉              | 2990/5000 [3:59:31<2:38:13,  4.72s/it]


訓練次數2990，總回報152.2750000000004


 60%|█████████████████████              | 3000/5000 [4:00:16<2:39:42,  4.79s/it]


訓練次數3000，總回報53.61739130434775


 60%|█████████████████████              | 3010/5000 [4:00:56<2:30:47,  4.55s/it]


訓練次數3010，總回報52.18918918918911


 60%|█████████████████████▏             | 3020/5000 [4:01:35<2:22:45,  4.33s/it]


訓練次數3020，總回報45.362025316455615


 61%|█████████████████████▏             | 3030/5000 [4:02:11<2:19:56,  4.26s/it]


訓練次數3030，總回報56.59205776173278


 61%|█████████████████████▎             | 3040/5000 [4:02:54<2:39:51,  4.89s/it]


訓練次數3040，總回報51.33333333333326


 61%|█████████████████████▎             | 3050/5000 [4:03:38<2:57:35,  5.46s/it]


訓練次數3050，總回報55.61565836298924


 61%|█████████████████████▍             | 3060/5000 [4:04:10<1:38:54,  3.06s/it]


訓練次數3060，總回報5.038297872340436


 61%|█████████████████████▍             | 3070/5000 [4:04:45<2:26:05,  4.54s/it]


訓練次數3070，總回報58.63197026022296


 62%|█████████████████████▌             | 3080/5000 [4:05:22<2:21:37,  4.43s/it]


訓練次數3080，總回報51.12292358803977


 62%|█████████████████████▋             | 3090/5000 [4:05:53<1:48:13,  3.40s/it]


訓練次數3090，總回報3.2020771513353177


 62%|█████████████████████▋             | 3100/5000 [4:06:25<2:16:09,  4.30s/it]


訓練次數3100，總回報58.11070110701097


 62%|█████████████████████▊             | 3110/5000 [4:07:04<2:29:03,  4.73s/it]


訓練次數3110，總回報50.791482649842216


 62%|█████████████████████▊             | 3120/5000 [4:07:42<2:27:39,  4.71s/it]


訓練次數3120，總回報91.88025477707018


 63%|█████████████████████▉             | 3130/5000 [4:08:22<2:18:08,  4.43s/it]


訓練次數3130，總回報59.969696969696884


 63%|█████████████████████▉             | 3140/5000 [4:09:01<2:23:20,  4.62s/it]


訓練次數3140，總回報49.722033898305


 63%|██████████████████████             | 3150/5000 [4:09:39<2:19:32,  4.53s/it]


訓練次數3150，總回報45.05705705705697


 63%|██████████████████████             | 3160/5000 [4:10:16<2:10:39,  4.26s/it]


訓練次數3160，總回報47.223529411764645


 63%|██████████████████████▏            | 3170/5000 [4:10:49<2:12:33,  4.35s/it]


訓練次數3170，總回報101.1555555555558


 64%|██████████████████████▎            | 3180/5000 [4:11:26<2:18:06,  4.55s/it]


訓練次數3180，總回報43.445871559632955


 64%|██████████████████████▎            | 3190/5000 [4:12:05<2:21:13,  4.68s/it]


訓練次數3190，總回報51.545150501672154


 64%|██████████████████████▍            | 3200/5000 [4:12:41<1:51:31,  3.72s/it]


訓練次數3200，總回報10.229032258064526


 64%|██████████████████████▍            | 3210/5000 [4:13:17<2:08:30,  4.31s/it]


訓練次數3210，總回報50.29508196721306


 64%|██████████████████████▌            | 3220/5000 [4:13:54<2:07:17,  4.29s/it]


訓練次數3220，總回報49.093247588424376


 65%|██████████████████████▌            | 3230/5000 [4:14:31<2:04:07,  4.21s/it]


訓練次數3230，總回報54.09343065693421


 65%|██████████████████████▋            | 3240/5000 [4:15:07<1:54:49,  3.91s/it]


訓練次數3240，總回報21.420779220779202


 65%|██████████████████████▊            | 3250/5000 [4:15:45<2:22:44,  4.89s/it]


訓練次數3250，總回報121.82264150943413


 65%|██████████████████████▊            | 3260/5000 [4:16:24<2:12:37,  4.57s/it]


訓練次數3260，總回報53.74394463667813


 65%|██████████████████████▉            | 3270/5000 [4:17:04<2:09:34,  4.49s/it]


訓練次數3270，總回報28.283177570093418


 66%|██████████████████████▉            | 3280/5000 [4:17:41<2:00:21,  4.20s/it]


訓練次數3280，總回報52.34394463667811


 66%|███████████████████████            | 3290/5000 [4:18:15<1:35:23,  3.35s/it]


訓練次數3290，總回報9.873015873015884


 66%|███████████████████████            | 3300/5000 [4:18:47<1:56:15,  4.10s/it]


訓練次數3300，總回報15.608038585209002


 66%|███████████████████████▏           | 3310/5000 [4:19:27<2:10:32,  4.63s/it]


訓練次數3310，總回報66.18930041152265


 66%|███████████████████████▏           | 3320/5000 [4:20:05<2:15:29,  4.84s/it]


訓練次數3320，總回報104.16819923371679


 67%|███████████████████████▎           | 3330/5000 [4:20:56<2:27:02,  5.28s/it]


訓練次數3330，總回報51.545150501672154


 67%|███████████████████████▍           | 3340/5000 [4:21:39<2:33:34,  5.55s/it]


訓練次數3340，總回報58.966666666666455


 67%|███████████████████████▍           | 3350/5000 [4:22:20<2:06:13,  4.59s/it]


訓練次數3350，總回報48.509554140127314


 67%|███████████████████████▌           | 3360/5000 [4:22:58<1:59:52,  4.39s/it]


訓練次數3360，總回報40.19780219780212


 67%|███████████████████████▌           | 3370/5000 [4:23:36<1:57:10,  4.31s/it]


訓練次數3370，總回報51.75838926174487


 68%|███████████████████████▋           | 3380/5000 [4:24:18<2:06:08,  4.67s/it]


訓練次數3380，總回報46.4615384615384


 68%|███████████████████████▋           | 3390/5000 [4:24:56<1:57:18,  4.37s/it]


訓練次數3390，總回報44.64999999999993


 68%|███████████████████████▊           | 3400/5000 [4:25:34<2:01:55,  4.57s/it]


訓練次數3400，總回報51.545150501672154


 68%|███████████████████████▊           | 3410/5000 [4:26:10<1:48:58,  4.11s/it]


訓練次數3410，總回報51.973063973063915


 68%|███████████████████████▉           | 3420/5000 [4:26:39<1:22:56,  3.15s/it]


訓練次數3420，總回報14.118556701030924


 69%|████████████████████████           | 3430/5000 [4:27:10<1:24:36,  3.23s/it]


訓練次數3430，總回報12.582352941176483


 69%|████████████████████████           | 3440/5000 [4:27:42<1:30:38,  3.49s/it]


訓練次數3440，總回報26.911267605633768


 69%|████████████████████████▏          | 3450/5000 [4:28:16<1:33:37,  3.62s/it]


訓練次數3450，總回報28.131884057970986


 69%|████████████████████████▏          | 3460/5000 [4:28:53<1:58:21,  4.61s/it]


訓練次數3460，總回報50.70627062706261


 69%|████████████████████████▎          | 3470/5000 [4:29:30<1:51:31,  4.37s/it]


訓練次數3470，總回報49.88925081433216


 70%|████████████████████████▎          | 3480/5000 [4:30:05<1:46:52,  4.22s/it]


訓練次數3480，總回報109.68425196850413


 70%|████████████████████████▍          | 3490/5000 [4:30:37<1:43:32,  4.11s/it]


訓練次數3490，總回報21.371428571428556


 70%|████████████████████████▌          | 3500/5000 [4:31:12<1:37:07,  3.88s/it]


訓練次數3500，總回報61.076923076923016


 70%|████████████████████████▌          | 3510/5000 [4:31:46<1:27:35,  3.53s/it]


訓練次數3510，總回報18.24102564102563


 70%|████████████████████████▋          | 3520/5000 [4:32:19<1:30:29,  3.67s/it]


訓練次數3520，總回報11.792682926829281


 71%|████████████████████████▋          | 3530/5000 [4:32:47<1:16:35,  3.13s/it]


訓練次數3530，總回報30.86834532374096


 71%|████████████████████████▊          | 3540/5000 [4:33:16<1:16:49,  3.16s/it]


訓練次數3540，總回報10.456291390728492


 71%|████████████████████████▊          | 3550/5000 [4:33:45<1:31:29,  3.79s/it]


訓練次數3550，總回報102.81830985915508


 71%|████████████████████████▉          | 3560/5000 [4:34:16<1:27:09,  3.63s/it]


訓練次數3560，總回報11.243859649122818


 71%|████████████████████████▉          | 3570/5000 [4:34:47<1:42:44,  4.31s/it]


訓練次數3570，總回報60.82993197278901


 72%|█████████████████████████          | 3580/5000 [4:35:20<1:24:56,  3.59s/it]


訓練次數3580，總回報10.7785234899329


 72%|█████████████████████████▏         | 3590/5000 [4:35:58<1:35:05,  4.05s/it]


訓練次數3590，總回報15.251898734177212


 72%|█████████████████████████▏         | 3600/5000 [4:36:33<1:24:34,  3.62s/it]


訓練次數3600，總回報8.960147601476029


 72%|█████████████████████████▎         | 3610/5000 [4:37:14<1:51:17,  4.80s/it]


訓練次數3610，總回報31.901273885350278


 72%|█████████████████████████▎         | 3620/5000 [4:37:44<1:18:51,  3.43s/it]


訓練次數3620，總回報18.19016393442621


 73%|█████████████████████████▍         | 3630/5000 [4:38:14<1:09:32,  3.05s/it]


訓練次數3630，總回報4.991366906474832


 73%|█████████████████████████▍         | 3640/5000 [4:38:42<1:12:47,  3.21s/it]


訓練次數3640，總回報18.462318840579698


 73%|█████████████████████████▌         | 3650/5000 [4:39:18<1:45:20,  4.68s/it]


訓練次數3650，總回報57.09090909090898


 73%|█████████████████████████▌         | 3660/5000 [4:39:54<1:19:07,  3.54s/it]


訓練次數3660，總回報19.81992337164749


 73%|█████████████████████████▋         | 3670/5000 [4:40:28<1:14:25,  3.36s/it]


訓練次數3670，總回報6.532415902140684


 74%|█████████████████████████▊         | 3680/5000 [4:41:02<1:05:33,  2.98s/it]


訓練次數3680，總回報9.62500000000001


 74%|█████████████████████████▊         | 3690/5000 [4:41:36<1:21:45,  3.74s/it]


訓練次數3690，總回報16.70952380952379


 74%|█████████████████████████▉         | 3700/5000 [4:42:18<1:54:11,  5.27s/it]


訓練次數3700，總回報125.22989690721684


 74%|█████████████████████████▉         | 3710/5000 [4:42:58<1:30:00,  4.19s/it]


訓練次數3710，總回報23.954340836012843


 74%|██████████████████████████         | 3720/5000 [4:43:32<1:28:14,  4.14s/it]


訓練次數3720，總回報57.09090909090898


 75%|██████████████████████████         | 3730/5000 [4:44:11<1:42:03,  4.82s/it]


訓練次數3730，總回報53.14820143884885


 75%|██████████████████████████▏        | 3740/5000 [4:44:54<1:38:04,  4.67s/it]


訓練次數3740，總回報39.58205128205123


 75%|██████████████████████████▎        | 3750/5000 [4:45:29<1:11:17,  3.42s/it]


訓練次數3750，總回報8.193103448275872


 75%|██████████████████████████▎        | 3760/5000 [4:46:07<1:23:06,  4.02s/it]


訓練次數3760，總回報53.94820143884886


 75%|██████████████████████████▍        | 3770/5000 [4:46:45<1:46:18,  5.19s/it]


訓練次數3770，總回報31.77692307692302


 76%|██████████████████████████▍        | 3780/5000 [4:47:22<1:35:41,  4.71s/it]


訓練次數3780，總回報45.11724137931017


 76%|██████████████████████████▌        | 3790/5000 [4:48:04<1:33:35,  4.64s/it]


訓練次數3790，總回報46.54842767295591


 76%|██████████████████████████▌        | 3800/5000 [4:48:48<1:20:54,  4.05s/it]


訓練次數3800，總回報7.357894736842114


 76%|██████████████████████████▋        | 3810/5000 [4:49:39<2:04:39,  6.29s/it]


訓練次數3810，總回報33.70955414012721


 76%|██████████████████████████▋        | 3820/5000 [4:50:26<2:17:26,  6.99s/it]


訓練次數3820，總回報106.06129032258136


 77%|██████████████████████████▊        | 3830/5000 [4:50:57<1:03:07,  3.24s/it]


訓練次數3830，總回報18.830258302583015


 77%|██████████████████████████▉        | 3840/5000 [4:51:31<1:34:11,  4.87s/it]


訓練次數3840，總回報46.299999999999876


 77%|██████████████████████████▉        | 3850/5000 [4:52:07<1:28:44,  4.63s/it]


訓練次數3850，總回報53.97222222222212


 77%|███████████████████████████        | 3860/5000 [4:52:41<1:09:51,  3.68s/it]


訓練次數3860，總回報8.151877133105812


 77%|████████████████████████████▋        | 3870/5000 [4:53:10<56:32,  3.00s/it]


訓練次數3870，總回報12.415942028985523


 78%|███████████████████████████▏       | 3880/5000 [4:53:42<1:03:59,  3.43s/it]


訓練次數3880，總回報27.065034965034936


 78%|███████████████████████████▏       | 3890/5000 [4:54:13<1:03:59,  3.46s/it]


訓練次數3890，總回報24.990974729241856


 78%|███████████████████████████▎       | 3900/5000 [4:54:46<1:17:01,  4.20s/it]


訓練次數3900，總回報53.51724137931026


 78%|███████████████████████████▎       | 3910/5000 [4:55:24<1:26:07,  4.74s/it]


訓練次數3910，總回報40.27319884726217


 78%|███████████████████████████▍       | 3920/5000 [4:55:54<1:00:50,  3.38s/it]


訓練次數3920，總回報13.305019305019306


 79%|███████████████████████████▌       | 3930/5000 [4:56:27<1:11:13,  3.99s/it]


訓練次數3930，總回報49.88925081433216


 79%|█████████████████████████████▏       | 3940/5000 [4:56:56<50:18,  2.85s/it]


訓練次數3940，總回報9.973015873015884


 79%|███████████████████████████▋       | 3950/5000 [4:57:31<1:19:40,  4.55s/it]


訓練次數3950，總回報75.89233449477351


 79%|███████████████████████████▋       | 3960/5000 [4:58:12<1:19:15,  4.57s/it]


訓練次數3960，總回報53.97222222222212


 79%|███████████████████████████▊       | 3970/5000 [4:58:46<1:02:45,  3.66s/it]


訓練次數3970，總回報21.5159609120521


 80%|███████████████████████████▊       | 3980/5000 [4:59:21<1:12:57,  4.29s/it]


訓練次數3980，總回報110.51387900355903


 80%|███████████████████████████▉       | 3990/5000 [5:00:00<1:14:48,  4.44s/it]


訓練次數3990，總回報44.826332288401176


 80%|████████████████████████████       | 4000/5000 [5:00:40<1:15:07,  4.51s/it]


訓練次數4000，總回報21.380866425992757


 80%|████████████████████████████       | 4010/5000 [5:01:16<1:11:14,  4.32s/it]


訓練次數4010，總回報50.3689655172413


 80%|████████████████████████████▏      | 4020/5000 [5:01:51<1:01:10,  3.75s/it]


訓練次數4020，總回報11.826888217522672


 81%|████████████████████████████▏      | 4030/5000 [5:02:28<1:09:52,  4.32s/it]


訓練次數4030，總回報24.16245954692554


 81%|████████████████████████████▎      | 4040/5000 [5:03:05<1:07:21,  4.21s/it]


訓練次數4040，總回報55.3758865248226


 81%|████████████████████████████▎      | 4050/5000 [5:03:40<1:09:08,  4.37s/it]


訓練次數4050，總回報25.81360544217684


 81%|████████████████████████████▍      | 4060/5000 [5:04:17<1:02:39,  4.00s/it]


訓練次數4060，總回報10.66666666666668


 81%|██████████████████████████████       | 4070/5000 [5:04:47<56:46,  3.66s/it]


訓練次數4070，總回報39.62280701754381


 82%|████████████████████████████▌      | 4080/5000 [5:05:21<1:03:28,  4.14s/it]


訓練次數4080，總回報46.10397553516812


 82%|██████████████████████████████▎      | 4090/5000 [5:05:56<59:03,  3.89s/it]


訓練次數4090，總回報33.80462633451953


 82%|████████████████████████████▋      | 4100/5000 [5:06:28<1:00:23,  4.03s/it]


訓練次數4100，總回報55.61565836298924


 82%|████████████████████████████▊      | 4110/5000 [5:07:06<1:09:46,  4.70s/it]


訓練次數4110，總回報47.74842767295593


 82%|██████████████████████████████▍      | 4120/5000 [5:07:42<57:52,  3.95s/it]


訓練次數4120，總回報41.48124999999994


 83%|██████████████████████████████▌      | 4130/5000 [5:08:16<54:34,  3.76s/it]


訓練次數4130，總回報36.911627906976705


 83%|██████████████████████████████▋      | 4140/5000 [5:08:44<44:39,  3.12s/it]


訓練次數4140，總回報17.77552447552447


 83%|██████████████████████████████▋      | 4150/5000 [5:09:13<51:32,  3.64s/it]


訓練次數4150，總回報46.277813504823094


 83%|█████████████████████████████      | 4160/5000 [5:09:53<1:02:14,  4.45s/it]


訓練次數4160，總回報64.52835249042138


 83%|██████████████████████████████▊      | 4170/5000 [5:10:29<59:07,  4.27s/it]


訓練次數4170，總回報55.13780918727903


 84%|██████████████████████████████▉      | 4180/5000 [5:11:00<47:53,  3.50s/it]


訓練次數4180，總回報14.04794520547945


 84%|█████████████████████████████▎     | 4190/5000 [5:11:36<1:01:30,  4.56s/it]


訓練次數4190，總回報56.5818181818181


 84%|█████████████████████████████▍     | 4200/5000 [5:12:15<1:03:37,  4.77s/it]


訓練次數4200，總回報48.509554140127314


 84%|███████████████████████████████▏     | 4210/5000 [5:12:46<49:18,  3.74s/it]


訓練次數4210，總回報27.909815950920226


 84%|███████████████████████████████▏     | 4220/5000 [5:13:23<53:17,  4.10s/it]


訓練次數4220，總回報15.152313167259795


 85%|███████████████████████████████▎     | 4230/5000 [5:13:55<51:18,  4.00s/it]


訓練次數4230，總回報53.38194945848369


 85%|███████████████████████████████▍     | 4240/5000 [5:14:24<36:29,  2.88s/it]


訓練次數4240，總回報10.410276679841912


 85%|███████████████████████████████▍     | 4250/5000 [5:15:02<52:00,  4.16s/it]


訓練次數4250，總回報13.257088122605369


 85%|███████████████████████████████▌     | 4260/5000 [5:15:35<47:55,  3.89s/it]


訓練次數4260，總回報18.5798561151079


 85%|███████████████████████████████▌     | 4270/5000 [5:16:09<52:40,  4.33s/it]


訓練次數4270，總回報56.10035842293898


 86%|███████████████████████████████▋     | 4280/5000 [5:16:43<52:30,  4.38s/it]


訓練次數4280，總回報50.49999999999993


 86%|███████████████████████████████▋     | 4290/5000 [5:17:13<36:52,  3.12s/it]


訓練次數4290，總回報11.526888217522673


 86%|███████████████████████████████▊     | 4300/5000 [5:17:44<38:10,  3.27s/it]


訓練次數4300，總回報9.43209876543211


 86%|███████████████████████████████▉     | 4310/5000 [5:18:14<49:56,  4.34s/it]


訓練次數4310，總回報70.41869688385269


 86%|███████████████████████████████▉     | 4320/5000 [5:18:53<53:16,  4.70s/it]


訓練次數4320，總回報153.62834645669324


 87%|██████████████████████████████▎    | 4330/5000 [5:19:35<1:00:09,  5.39s/it]


訓練次數4330，總回報54.80140845070416


 87%|████████████████████████████████     | 4340/5000 [5:20:13<46:59,  4.27s/it]


訓練次數4340，總回報15.205376344086028


 87%|████████████████████████████████▏    | 4350/5000 [5:20:40<32:27,  3.00s/it]


訓練次數4350，總回報10.277170418006442


 87%|████████████████████████████████▎    | 4360/5000 [5:21:20<48:46,  4.57s/it]


訓練次數4360，總回報54.10209059233438


 87%|████████████████████████████████▎    | 4370/5000 [5:21:56<44:12,  4.21s/it]


訓練次數4370，總回報46.2333333333332


 88%|████████████████████████████████▍    | 4380/5000 [5:22:31<43:44,  4.23s/it]


訓練次數4380，總回報60.51908396946554


 88%|████████████████████████████████▍    | 4390/5000 [5:23:11<47:24,  4.66s/it]


訓練次數4390，總回報50.09150326797379


 88%|████████████████████████████████▌    | 4400/5000 [5:23:53<49:55,  4.99s/it]


訓練次數4400，總回報21.70819672131146


 88%|████████████████████████████████▋    | 4410/5000 [5:24:35<51:45,  5.26s/it]


訓練次數4410，總回報49.29150326797378


 88%|████████████████████████████████▋    | 4420/5000 [5:25:14<36:46,  3.80s/it]


訓練次數4420，總回報16.588505747126426


 89%|████████████████████████████████▊    | 4430/5000 [5:25:42<37:59,  4.00s/it]


訓練次數4430，總回報46.36451612903218


 89%|████████████████████████████████▊    | 4440/5000 [5:26:16<29:44,  3.19s/it]


訓練次數4440，總回報10.5785234899329


 89%|████████████████████████████████▉    | 4450/5000 [5:26:47<38:44,  4.23s/it]


訓練次數4450，總回報52.82857142857124


 89%|███████████████████████████████▏   | 4460/5000 [5:27:46<1:02:15,  6.92s/it]


訓練次數4460，總回報46.217391304347665


 89%|█████████████████████████████████    | 4470/5000 [5:28:23<40:21,  4.57s/it]


訓練次數4470，總回報54.637809187279025


 90%|█████████████████████████████████▏   | 4480/5000 [5:29:02<40:31,  4.68s/it]


訓練次數4480，總回報19.290066225165557


 90%|█████████████████████████████████▏   | 4490/5000 [5:29:35<31:31,  3.71s/it]


訓練次數4490，總回報15.9013029315961


 90%|█████████████████████████████████▎   | 4500/5000 [5:30:23<43:10,  5.18s/it]


訓練次數4500，總回報7.951877133105812


 90%|█████████████████████████████████▎   | 4510/5000 [5:31:03<34:17,  4.20s/it]


訓練次數4510，總回報34.125157232704375


 90%|█████████████████████████████████▍   | 4520/5000 [5:31:34<25:03,  3.13s/it]


訓練次數4520，總回報20.01755725190838


 91%|█████████████████████████████████▌   | 4530/5000 [5:32:16<35:31,  4.54s/it]


訓練次數4530，總回報17.172602739726017


 91%|█████████████████████████████████▌   | 4540/5000 [5:32:59<37:35,  4.90s/it]


訓練次數4540，總回報22.200334448160504


 91%|█████████████████████████████████▋   | 4550/5000 [5:33:41<37:49,  5.04s/it]


訓練次數4550，總回報45.907987220447204


 91%|█████████████████████████████████▋   | 4560/5000 [5:34:17<33:38,  4.59s/it]


訓練次數4560，總回報37.16835443037969


 91%|█████████████████████████████████▊   | 4570/5000 [5:35:11<51:52,  7.24s/it]


訓練次數4570，總回報127.4803149606306


 92%|█████████████████████████████████▉   | 4580/5000 [5:35:56<26:22,  3.77s/it]


訓練次數4580，總回報9.232098765432111


 92%|█████████████████████████████████▉   | 4590/5000 [5:36:37<34:28,  5.05s/it]


訓練次數4590，總回報39.701812688821626


 92%|██████████████████████████████████   | 4600/5000 [5:37:23<30:08,  4.52s/it]


訓練次數4600，總回報38.11832061068693


 92%|██████████████████████████████████   | 4610/5000 [5:38:08<34:30,  5.31s/it]


訓練次數4610，總回報47.27222222222205


 92%|██████████████████████████████████▏  | 4620/5000 [5:38:48<39:50,  6.29s/it]


訓練次數4620，總回報103.22238805970224


 93%|██████████████████████████████████▎  | 4630/5000 [5:39:28<26:53,  4.36s/it]


訓練次數4630，總回報48.20066445182715


 93%|██████████████████████████████████▎  | 4640/5000 [5:40:08<29:08,  4.86s/it]


訓練次數4640，總回報130.36619217081892


 93%|██████████████████████████████████▍  | 4650/5000 [5:40:50<28:58,  4.97s/it]


訓練次數4650，總回報52.35714285714271


 93%|██████████████████████████████████▍  | 4660/5000 [5:41:30<26:39,  4.70s/it]


訓練次數4660，總回報49.58373702422137


 93%|██████████████████████████████████▌  | 4670/5000 [5:42:09<27:16,  4.96s/it]


訓練次數4670，總回報54.46917293233069


 94%|██████████████████████████████████▋  | 4680/5000 [5:42:44<24:20,  4.56s/it]


訓練次數4680，總回報40.49743589743572


 94%|██████████████████████████████████▋  | 4690/5000 [5:43:26<24:30,  4.74s/it]


訓練次數4690，總回報40.84761904761891


 94%|██████████████████████████████████▊  | 4700/5000 [5:44:04<21:27,  4.29s/it]


訓練次數4700，總回報29.7944444444444


 94%|██████████████████████████████████▊  | 4710/5000 [5:44:33<18:42,  3.87s/it]


訓練次數4710，總回報53.992057761732745


 94%|██████████████████████████████████▉  | 4720/5000 [5:45:10<18:28,  3.96s/it]


訓練次數4720，總回報18.289605734767015


 95%|███████████████████████████████████  | 4730/5000 [5:45:37<12:57,  2.88s/it]


訓練次數4730，總回報6.900000000000009


 95%|███████████████████████████████████  | 4740/5000 [5:46:04<16:07,  3.72s/it]


訓練次數4740，總回報26.362264150943368


 95%|███████████████████████████████████▏ | 4750/5000 [5:46:41<16:03,  3.85s/it]


訓練次數4750，總回報17.89999999999999


 95%|███████████████████████████████████▏ | 4760/5000 [5:47:16<19:39,  4.91s/it]


訓練次數4760，總回報50.122923588039754


 95%|███████████████████████████████████▎ | 4770/5000 [5:48:00<21:29,  5.60s/it]


訓練次數4770，總回報37.795081967212944


 96%|███████████████████████████████████▎ | 4780/5000 [5:48:50<21:45,  5.94s/it]


訓練次數4780，總回報35.888311688311475


 96%|███████████████████████████████████▍ | 4790/5000 [5:49:34<16:13,  4.63s/it]


訓練次數4790，總回報48.46896551724127


 96%|███████████████████████████████████▌ | 4800/5000 [5:49:58<09:08,  2.74s/it]


訓練次數4800，總回報4.752688172043024


 96%|███████████████████████████████████▌ | 4810/5000 [5:50:42<13:33,  4.28s/it]


訓練次數4810，總回報12.608280254777085


 96%|███████████████████████████████████▋ | 4820/5000 [5:51:37<15:20,  5.11s/it]


訓練次數4820，總回報46.706270627062565


 97%|███████████████████████████████████▋ | 4830/5000 [5:52:22<14:38,  5.17s/it]


訓練次數4830，總回報48.79090909090889


 97%|███████████████████████████████████▊ | 4840/5000 [5:53:34<23:52,  8.95s/it]


訓練次數4840，總回報81.75767918088788


 97%|███████████████████████████████████▉ | 4850/5000 [5:54:11<14:48,  5.92s/it]


訓練次數4850，總回報45.03333333333319


 97%|███████████████████████████████████▉ | 4860/5000 [5:55:11<17:22,  7.45s/it]


訓練次數4860，總回報51.48181818181804


 97%|████████████████████████████████████ | 4870/5000 [5:56:03<13:47,  6.37s/it]


訓練次數4870，總回報35.326829268292485


 98%|████████████████████████████████████ | 4880/5000 [5:57:02<15:03,  7.53s/it]


訓練次數4880，總回報78.13835616438385


 98%|████████████████████████████████████▏| 4890/5000 [5:57:52<11:10,  6.10s/it]


訓練次數4890，總回報54.129961089493975


 98%|████████████████████████████████████▎| 4900/5000 [5:58:49<12:39,  7.60s/it]


訓練次數4900，總回報37.88918918918895


 98%|████████████████████████████████████▎| 4910/5000 [5:59:42<10:12,  6.80s/it]


訓練次數4910，總回報37.19150326797368


 98%|████████████████████████████████████▍| 4920/5000 [6:00:24<04:42,  3.53s/it]


訓練次數4920，總回報10.86666666666668


 99%|████████████████████████████████████▍| 4930/5000 [6:01:06<06:55,  5.93s/it]


訓練次數4930，總回報7.093442622950844


 99%|████████████████████████████████████▌| 4940/5000 [6:01:45<03:47,  3.80s/it]


訓練次數4940，總回報3.530769230769237


 99%|████████████████████████████████████▋| 4950/5000 [6:02:45<05:47,  6.96s/it]


訓練次數4950，總回報47.09811320754696


 99%|████████████████████████████████████▋| 4960/5000 [6:03:50<04:56,  7.40s/it]


訓練次數4960，總回報37.24820143884871


 99%|████████████████████████████████████▊| 4970/5000 [6:04:31<03:09,  6.33s/it]


訓練次數4970，總回報41.73197026022281


100%|████████████████████████████████████▊| 4980/5000 [6:05:34<03:10,  9.55s/it]


訓練次數4980，總回報38.28831168831151


100%|████████████████████████████████████▉| 4990/5000 [6:06:33<01:18,  7.82s/it]


訓練次數4990，總回報45.202090592334294


100%|█████████████████████████████████████| 5000/5000 [6:08:06<00:00,  4.42s/it]


訓練次數5000，總回報-94.99999999999903





In [16]:
# Agent.Record()