# import 必要套件

In [None]:
import random
import time,math
import numpy as np
import gymnasium as gym
import gymnasium.wrappers as gym_wrap
import matplotlib.pyplot as plt
import matplotlib.animation as animation #輸出動畫影片
from IPython import display
from tqdm import tqdm

In [None]:
import torch
import torch.nn.functional as F
import collections
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
class ImageEnv(gym.Wrapper):
  def __init__(self,env,stack_frames=4,delay_op=50):
    super(ImageEnv, self).__init__(env)
    self.delay_op = delay_op
    self.stack_frames = stack_frames
  def reset(self):
    s, info = self.env.reset()
    for i in range(self.delay_op):
      s, r, terminated, truncated, info = self.env.step(0)
      s=s[:72, 12:84]/255.0-0.5
      self.stacked_state = np.tile( s , (self.stack_frames,1,1) )  # [4, 84, 84]
    return self.stacked_state, info

  def step(self, action):
    reward = 0
    for _ in range(self.stack_frames):
      s, r, terminated, truncated, info = self.env.step(action)
      if r==-100:terminated=True
      s=s[:72, 12:84]/255.0-0.5
      reward += r
      if terminated or truncated:break
      self.stacked_state = np.concatenate((self.stacked_state[1:], s[np.newaxis]), axis=0)
    return self.stacked_state, reward, terminated, truncated, info

# 建立Replay Buffer類別

In [None]:
class ReplayBuffer:
  def __init__(self,max_size=int(1e5), num_steps=1):
    self.s = np.zeros((max_size,4,72,72), dtype=np.float32)
    self.a = np.zeros((max_size,), dtype=np.int64)
    self.r = np.zeros((max_size, 1), dtype=np.float32)
    self.s_ = np.zeros((max_size,4,72,72), dtype=np.float32)
    self.done = np.zeros((max_size, 1), dtype=np.float32)
    self.ptr = 0
    self.size = 0
    self.max_size = max_size
    self.num_steps = num_steps

  def append(self,s,a,r,s_,done):
    self.s[self.ptr] = s
    self.a[self.ptr] = a
    self.r[self.ptr] = r
    self.s_[self.ptr] = s_
    self.done[self.ptr] = done
    self.ptr = (self.ptr + 1) % self.max_size
    self.size = min(self.size+1,self.max_size)
  def sample(self, batch_size):
    ind = np.random.randint(0, self.size, batch_size)
    return torch.FloatTensor(self.s[ind]),torch.LongTensor(self.a[ind]),torch.FloatTensor(self.r[ind]),torch.FloatTensor(self.s_[ind]),torch.FloatTensor(self.done[ind])

# 搭建DQN神經網路的類別

In [None]:
#搭建 Dueling DQN 的神經網路
class DQN(torch.nn.Module):
  def __init__(self,n_act):
    super(DQN,self).__init__()
    self.conv1 = torch.nn.Conv2d(4, 16, kernel_size=4, stride=4)    #[N,4,84,84]->[N,16,20,20]
    self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=4, stride=2)    #[N,16,20,20]->[N,32,9,9]
    self.fc1 = torch.nn.Linear(32 * 8 * 8, 256)
    self.fc2 = torch.nn.Linear(256,64)
    self.value_fc=torch.nn.Linear(64,16) #搭建狀態價值函數V(s)的神經網路
    self.value=torch.nn.Linear(16,1)
    self.adv_fc=torch.nn.Linear(64,32) #搭建行動優勢函數A(s,a)的神經網路
    self.adv=torch.nn.Linear(32,n_act)
  def forward(self,x):
    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    x = x.view((-1, 32 * 8 * 8))
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    value=self.value(F.relu(self.value_fc(x)))
    adv=self.adv(F.relu(self.adv_fc(x)))
    return value+(adv-adv.mean(dim=-1,keepdim=True))

# 設定是否載入模型參數，舊參數檔路徑，新參數檔路徑

In [None]:
Load_File=0
Old_File=f"Model-{Load_File}.pt"
if Load_File>0:
  Log= np.load(f"Log-{Load_File}.npy", allow_pickle=True).item()
else:
  Log={"TrainReward":[],"TestReward":[],"Loss":[]}

In [None]:
env=gym.make('CarRacing-v3',render_mode="rgb_array",domain_randomize=False, continuous=False)
env = gym_wrap.GrayscaleObservation(env)
env = ImageEnv(env)

# 搭建智能體Agent的類別

In [None]:
class DQNAgent():
  def __init__(self,gamma=0.9,eps_low=0.1,lr=0.00025):
    self.env = env
    self.n_act=self.env.action_space.n
    self.PredictDQN= DQN(self.n_act)
    self.TargetDQN= DQN(self.n_act)
    if Load_File>0:
      self.PredictDQN.load_state_dict(torch.load(Old_File))
      self.TargetDQN.load_state_dict(torch.load(Old_File))
    self.PredictDQN.to(device)
    self.TargetDQN.to(device)
    self.LossFun=torch.nn.SmoothL1Loss()
    self.optimizer=torch.optim.Adam(self.PredictDQN.parameters(),lr=lr)
    self.gamma=gamma
    self.eps_low=eps_low
    self.rb=ReplayBuffer(max_size=10000, num_steps=1)
  def PredictA(self,s):
    with torch.no_grad():
      return torch.argmax(self.PredictDQN(torch.FloatTensor(s).to(device))).item()
  def SelectA(self,a):
    return self.env.action_space.sample() if np.random.random()<self.EPS else a
  def Train(self,N_EPISODES):
    for i in tqdm(range(Load_File,N_EPISODES)):
      self.EPS=self.eps_low+(1-self.eps_low)*math.exp(-i*12/(N_EPISODES))
      total_reward=0
      s,_=self.env.reset()
      while True:
        a=self.SelectA(self.PredictA(s))
        s_,r,done,stop,_=self.env.step(a)
        self.rb.append(s,a,r,s_,done)
        if self.rb.size > 200 and i%self.rb.num_steps==0:self.Learn()
        if i % 20==0:  self.TargetDQN.load_state_dict(self.PredictDQN.state_dict())
        s=s_
        total_reward+=r
        if done or stop:break
      # print(f"\n{total_reward}")
      Log["TrainReward"].append(total_reward)
      if i % 10 == 9:
        test_reward=self.Test()
        print(f"\n訓練次數{i+1}，總回報{test_reward}")
        Log["TestReward"].append(test_reward)
        torch.save(self.PredictDQN.state_dict(), f"Model-{i+1}.pt")
        np.save(f"Log-{i+1}.npy", Log)
  def Learn(self):
    self.optimizer.zero_grad()
    batch_s, batch_a, batch_r, batch_s_, batch_done=self.rb.sample(32)
    predict_Q = (self.PredictDQN(batch_s.to(device))*F.one_hot(batch_a.long().to(device),self.n_act)).sum(1,keepdims=True)
    with torch.no_grad():
      target_Q = batch_r.to(device)+(1-batch_done.to(device))*self.gamma*self.TargetDQN(batch_s_.to(device)).max(1,keepdims=True)[0]
    loss = self.LossFun(predict_Q, target_Q)
    Log["Loss"].append(float(loss))
    loss.backward()
    self.optimizer.step()
  def Test(self,VIDEO=False):
    total_reward=0
    video=[]
    s,_=self.env.reset()
    while True:
      video.append(self.env.render())
      a=self.PredictA(s)
      s,r,done,stop,_=self.env.step(a)
      total_reward+=r
      if done or stop:break
    if VIDEO:
      patch = plt.imshow(video[0]) #產生展示圖形物件
      plt.axis('off') #關閉坐標軸
      def animate(i): #設定更換影格的函數
        patch.set_data(video[i])
        #plt.gcf()=>建新繪圖區 animate=>更換影格函數 frames=>影格數 interval=>影隔間距(毫秒)
      anim = animation.FuncAnimation(plt.gcf(),animate,frames=len(video),interval=200)
      anim.save('Car_racing.mp4') #儲存為mp4擋
    return total_reward
  def Record(self):
    total_reward=0
    s,_=self.env.reset()
    while True:
      image=self.env.render()
      plt.imshow(image)
      #plt.imsave(f"/content/drive/MyDrive/recording/{str(int(time.time()))}.png", image)
      a=self.PredictA(s)
      s,r,done,stop,_=self.env.step(a)
      print(r)
      total_reward+=r
      plt.pause(0.1)
      #清除目前的顯示
      display.clear_output(wait=True)
      if done or stop:break
    print(total_reward)

In [None]:
Agent=DQNAgent(gamma=0.95,eps_low=0.05,lr=0.00025)
Agent.Train(N_EPISODES=5000)

  0%|                                      | 10/5000 [02:08<18:55:47, 13.66s/it]


訓練次數10，總回報10.225641025641039


  0%|▏                                     | 20/5000 [04:17<20:59:19, 15.17s/it]


訓練次數20，總回報-94.99999999999893


  1%|▏                                     | 30/5000 [06:04<15:03:14, 10.90s/it]


訓練次數30，總回報17.215384615384608


  1%|▎                                     | 40/5000 [08:09<22:16:41, 16.17s/it]


訓練次數40，總回報-91.85534591194882


  1%|▍                                     | 50/5000 [10:12<20:04:26, 14.60s/it]


訓練次數50，總回報-94.99999999999898


  1%|▍                                     | 60/5000 [12:22<19:06:03, 13.92s/it]


訓練次數60，總回報38.733887043189306


  1%|▌                                     | 70/5000 [14:16<21:16:24, 15.53s/it]


訓練次數70，總回報-94.99999999999902


  2%|▌                                     | 80/5000 [16:29<23:18:22, 17.05s/it]


訓練次數80，總回報-94.99999999999895


  2%|▋                                     | 90/5000 [18:25<23:24:51, 17.17s/it]


訓練次數90，總回報-94.99999999999922


  2%|▋                                    | 100/5000 [20:19<17:14:31, 12.67s/it]


訓練次數100，總回報13.336842105263168


  2%|▊                                    | 110/5000 [22:25<24:53:24, 18.32s/it]


訓練次數110，總回報-77.75862068965509


  2%|▉                                    | 120/5000 [23:57<17:45:49, 13.10s/it]


訓練次數120，總回報-84.89898989898944


  3%|▉                                     | 130/5000 [25:14<9:55:06,  7.33s/it]


訓練次數130，總回報14.801413427561842


  3%|█                                     | 140/5000 [26:31<9:53:38,  7.33s/it]


訓練次數140，總回報11.521602787456455


  3%|█▏                                    | 150/5000 [27:38<7:38:12,  5.67s/it]


訓練次數150，總回報9.929032258064534


  3%|█▏                                   | 160/5000 [29:02<13:18:23,  9.90s/it]


訓練次數160，總回報12.954838709677434


  3%|█▎                                    | 170/5000 [29:59<7:31:34,  5.61s/it]


訓練次數170，總回報38.07692307692302


  4%|█▎                                    | 180/5000 [30:53<8:56:17,  6.68s/it]


訓練次數180，總回報41.94460966542744


  4%|█▍                                    | 190/5000 [31:45<8:49:38,  6.61s/it]


訓練次數190，總回報13.708163265306133


  4%|█▌                                    | 200/5000 [33:09<9:24:09,  7.05s/it]


訓練次數200，總回報41.71690140845065


  4%|█▌                                    | 210/5000 [34:08<7:58:51,  6.00s/it]


訓練次數210，總回報25.907092198581516


  4%|█▋                                   | 220/5000 [35:21<10:29:22,  7.90s/it]


訓練次數220，總回報16.104832713754657


  5%|█▋                                    | 230/5000 [36:32<8:06:30,  6.12s/it]


訓練次數230，總回報41.23157894736836


  5%|█▊                                   | 240/5000 [37:45<14:39:44, 11.09s/it]


訓練次數240，總回報-94.999999999999


  5%|█▉                                    | 250/5000 [38:37<7:13:15,  5.47s/it]


訓練次數250，總回報41.231578947368355


  5%|█▉                                    | 260/5000 [39:36<7:04:12,  5.37s/it]


訓練次數260，總回報58.37037037037027


  5%|██                                    | 270/5000 [40:24<6:49:41,  5.20s/it]


訓練次數270，總回報19.215094339622638


  6%|██                                   | 280/5000 [41:55<14:49:53, 11.31s/it]


訓練次數280，總回報-94.99999999999909


  6%|██▏                                   | 290/5000 [42:39<6:12:14,  4.74s/it]


訓練次數290，總回報46.64767025089599


  6%|██▎                                   | 300/5000 [43:36<7:48:12,  5.98s/it]


訓練次數300，總回報56.47499999999988


  6%|██▎                                   | 310/5000 [44:41<9:31:14,  7.31s/it]


訓練次數310，總回報88.2500000000001


  6%|██▎                                  | 320/5000 [45:47<10:17:58,  7.92s/it]


訓練次數320，總回報35.40163934426225


  7%|██▍                                  | 330/5000 [47:22<14:25:19, 11.12s/it]


訓練次數330，總回報247.67553956834664


  7%|██▌                                  | 340/5000 [48:48<14:19:29, 11.07s/it]


訓練次數340，總回報112.43649635036519


  7%|██▌                                  | 350/5000 [50:44<13:46:47, 10.67s/it]


訓練次數350，總回報27.65124653739607


  7%|██▋                                  | 360/5000 [52:33<14:49:16, 11.50s/it]


訓練次數360，總回報53.56969696969684


  7%|██▋                                  | 370/5000 [53:56<12:42:17,  9.88s/it]


訓練次數370，總回報33.831921824104136


  8%|██▊                                  | 380/5000 [55:06<10:02:26,  7.82s/it]


訓練次數380，總回報37.864705882352844


  8%|██▉                                   | 390/5000 [55:56<7:39:49,  5.98s/it]


訓練次數390，總回報68.71929824561404


  8%|██▉                                  | 400/5000 [57:09<13:42:50, 10.73s/it]


訓練次數400，總回報19.16377708978326


  8%|███                                   | 410/5000 [58:05<7:43:02,  6.05s/it]


訓練次數410，總回報68.77096774193546


  8%|███▏                                  | 420/5000 [59:04<9:32:17,  7.50s/it]


訓練次數420，總回報47.923529411764655


  9%|███▎                                  | 430/5000 [59:57<7:07:18,  5.61s/it]


訓練次數430，總回報80.34664536741221


  9%|███                                | 440/5000 [1:01:03<10:20:30,  8.16s/it]


訓練次數440，總回報86.91841004184104


  9%|███▏                               | 450/5000 [1:02:33<15:21:33, 12.15s/it]


訓練次數450，總回報121.72209737827751


  9%|███▏                               | 460/5000 [1:03:45<12:26:56,  9.87s/it]


訓練次數460，總回報195.30864197530963


  9%|███▍                                | 470/5000 [1:04:55<9:18:26,  7.40s/it]


訓練次數470，總回報75.50100334448166


 10%|███▍                                | 480/5000 [1:06:04<9:23:30,  7.48s/it]


訓練次數480，總回報67.48608058608059


 10%|███▌                                | 490/5000 [1:07:02<6:53:48,  5.51s/it]


訓練次數490，總回報64.86610169491516


 10%|███▌                                | 500/5000 [1:08:10<7:32:46,  6.04s/it]


訓練次數500，總回報48.53189964157697


 10%|███▌                               | 510/5000 [1:09:27<10:23:36,  8.33s/it]


訓練次數510，總回報209.20649350649478


 10%|███▋                               | 520/5000 [1:11:05<12:37:05, 10.14s/it]


訓練次數520，總回報45.19139072847674


 11%|███▋                               | 530/5000 [1:12:27<14:44:54, 11.88s/it]


訓練次數530，總回報140.8440771349868


 11%|███▊                               | 540/5000 [1:13:50<11:33:08,  9.32s/it]


訓練次數540，總回報75.22500000000002


 11%|███▊                               | 550/5000 [1:15:17<13:39:55, 11.06s/it]


訓練次數550，總回報180.2464788732403


 11%|███▉                               | 560/5000 [1:16:28<12:48:00, 10.38s/it]


訓練次數560，總回報91.73455149501676


 11%|███▉                               | 570/5000 [1:17:53<11:50:54,  9.63s/it]


訓練次數570，總回報73.44259927797835


 12%|████                               | 580/5000 [1:19:05<10:35:57,  8.63s/it]


訓練次數580，總回報76.32807017543865


 12%|████▏                              | 590/5000 [1:20:38<14:55:36, 12.19s/it]


訓練次數590，總回報67.05168539325837


 12%|████▏                              | 600/5000 [1:21:57<10:15:07,  8.39s/it]


訓練次數600，總回報331.14829931972764


 12%|████▍                               | 610/5000 [1:23:12<9:03:06,  7.42s/it]


訓練次數610，總回報126.82014652014689


 12%|████▍                               | 620/5000 [1:24:21<9:52:55,  8.12s/it]


訓練次數620，總回報112.33448275862101


 13%|████▌                               | 630/5000 [1:25:25<7:36:04,  6.26s/it]


訓練次數630，總回報111.23478260869607


 13%|████▍                              | 640/5000 [1:26:41<10:18:57,  8.52s/it]


訓練次數640，總回報322.0262458471759


 13%|████▌                              | 650/5000 [1:28:12<14:02:42, 11.62s/it]


訓練次數650，總回報277.3142857142865


 13%|████▌                              | 660/5000 [1:29:21<10:20:15,  8.57s/it]


訓練次數660，總回報293.4352517985616


 13%|████▊                               | 670/5000 [1:30:46<9:28:49,  7.88s/it]


訓練次數670，總回報195.05035460993022


 14%|████▊                              | 680/5000 [1:32:29<12:02:52, 10.04s/it]


訓練次數680，總回報295.23333333333403


 14%|████▊                              | 690/5000 [1:33:47<10:41:14,  8.93s/it]


訓練次數690，總回報285.3999999999999


 14%|█████                               | 700/5000 [1:35:00<9:19:14,  7.80s/it]


訓練次數700，總回報300.4697594501718


 14%|████▉                              | 710/5000 [1:36:30<13:41:05, 11.48s/it]


訓練次數710，總回報201.71265822784918


 14%|█████                              | 720/5000 [1:37:44<10:57:07,  9.21s/it]


訓練次數720，總回報92.95492957746491


 15%|█████                              | 730/5000 [1:39:39<17:44:38, 14.96s/it]


訓練次數730，總回報316.50336700336675


 15%|█████▎                              | 740/5000 [1:40:56<9:22:34,  7.92s/it]


訓練次數740，總回報193.2762237762246


 15%|█████▎                             | 750/5000 [1:42:31<13:26:31, 11.39s/it]


訓練次數750，總回報225.58808777429607


 15%|█████▎                             | 760/5000 [1:44:15<11:52:20, 10.08s/it]


訓練次數760，總回報118.26514657980496


 15%|█████▌                              | 770/5000 [1:45:41<8:49:55,  7.52s/it]


訓練次數770，總回報138.6000000000004


 16%|█████▍                             | 780/5000 [1:46:57<12:06:26, 10.33s/it]


訓練次數780，總回報352.1652329749096


 16%|█████▌                             | 790/5000 [1:48:30<13:20:51, 11.41s/it]


訓練次數790，總回報341.2613718411542


 16%|█████▌                             | 800/5000 [1:49:57<11:20:51,  9.73s/it]


訓練次數800，總回報109.85419847328258


 16%|█████▋                             | 810/5000 [1:51:32<14:27:55, 12.43s/it]


訓練次數810，總回報369.7960264900655


 16%|█████▋                             | 820/5000 [1:53:26<15:44:26, 13.56s/it]


訓練次數820，總回報367.29090909090866


 17%|█████▉                              | 830/5000 [1:54:21<5:55:42,  5.12s/it]


訓練次數830，總回報35.6116279069767


 17%|██████                              | 840/5000 [1:55:13<5:49:01,  5.03s/it]


訓練次數840，總回報40.347457627118565


 17%|██████                              | 850/5000 [1:56:25<9:15:05,  8.03s/it]


訓練次數850，總回報167.70041152263423


 17%|██████                             | 860/5000 [1:57:47<11:06:30,  9.66s/it]


訓練次數860，總回報313.92972972972984


 17%|██████                             | 870/5000 [1:59:22<18:55:08, 16.49s/it]


訓練次數870，總回報715.5477508650463


 18%|██████▏                            | 880/5000 [2:01:09<15:14:32, 13.32s/it]


訓練次數880，總回報95.8036253776437


 18%|██████▏                            | 890/5000 [2:02:27<10:32:27,  9.23s/it]


訓練次數890，總回報317.25689045936406


 18%|██████▎                            | 900/5000 [2:04:00<10:00:25,  8.79s/it]


訓練次數900，總回報302.7226148409897


 18%|██████▌                             | 910/5000 [2:05:19<9:02:35,  7.96s/it]


訓練次數910，總回報210.0105960264907


 18%|██████▌                             | 920/5000 [2:06:15<6:55:46,  6.11s/it]


訓練次數920，總回報48.37067137809178


 19%|██████▌                            | 930/5000 [2:08:02<14:56:18, 13.21s/it]


訓練次數930，總回報270.84827586206904


 19%|██████▌                            | 940/5000 [2:09:48<14:48:20, 13.13s/it]


訓練次數940，總回報311.61460674157263


 19%|██████▋                            | 950/5000 [2:11:06<10:56:37,  9.73s/it]


訓練次數950，總回報369.9909090909087


 19%|██████▋                            | 960/5000 [2:12:32<11:26:35, 10.20s/it]


訓練次數960，總回報453.7988593155873


 19%|██████▉                             | 970/5000 [2:13:57<9:32:53,  8.53s/it]


訓練次數970，總回報255.09528619528749


 20%|██████▊                            | 980/5000 [2:15:52<14:13:55, 12.75s/it]


訓練次數980，總回報106.13943661971847


 20%|███████▏                            | 990/5000 [2:17:22<9:06:08,  8.17s/it]


訓練次數990，總回報101.74110032362486


 20%|██████▊                           | 1000/5000 [2:19:31<14:16:32, 12.85s/it]


訓練次數1000，總回報320.87936507936496


 20%|██████▊                           | 1010/5000 [2:20:55<12:25:16, 11.21s/it]


訓練次數1010，總回報334.1216216216215


 20%|██████▉                           | 1020/5000 [2:22:29<11:29:53, 10.40s/it]


訓練次數1020，總回報416.2490706319686


 21%|███████                           | 1030/5000 [2:24:32<15:57:02, 14.46s/it]


訓練次數1030，總回報330.0033670033665


 21%|███████                           | 1040/5000 [2:26:49<15:59:08, 14.53s/it]


訓練次數1040，總回報446.41057401812134


 21%|███████▎                           | 1050/5000 [2:28:17<7:49:44,  7.14s/it]


訓練次數1050，總回報45.25555555555548


 21%|███████▏                          | 1060/5000 [2:29:23<10:23:06,  9.49s/it]


訓練次數1060，總回報343.57777777777693


 21%|███████▎                          | 1070/5000 [2:31:41<16:59:19, 15.56s/it]


訓練次數1070，總回報376.55714285714134


 22%|███████▎                          | 1080/5000 [2:33:14<14:12:06, 13.04s/it]


訓練次數1080，總回報342.7113924050626


 22%|███████▍                          | 1090/5000 [2:34:54<13:18:39, 12.26s/it]


訓練次數1090，總回報660.9309859154863


 22%|███████▍                          | 1100/5000 [2:36:42<14:39:45, 13.53s/it]


訓練次數1100，總回報291.6688524590169


 22%|███████▌                          | 1110/5000 [2:38:01<11:22:59, 10.53s/it]


訓練次數1110，總回報221.0474576271194


 22%|███████▌                          | 1120/5000 [2:39:32<11:35:19, 10.75s/it]


訓練次數1120，總回報105.93636363636386


 23%|███████▋                          | 1130/5000 [2:41:09<11:50:50, 11.02s/it]


訓練次數1130，總回報500.74525547445086


 23%|███████▊                          | 1140/5000 [2:43:16<15:17:12, 14.26s/it]


訓練次數1140，總回報261.69748427673045


 23%|███████▊                          | 1150/5000 [2:44:42<11:37:00, 10.86s/it]


訓練次數1150，總回報295.56085409252705


 23%|███████▉                          | 1160/5000 [2:46:08<13:33:56, 12.72s/it]


訓練次數1160，總回報466.69402985074277


 23%|███████▉                          | 1170/5000 [2:48:05<11:58:04, 11.25s/it]


訓練次數1170，總回報327.6485049833884


 24%|████████                          | 1180/5000 [2:49:37<12:07:18, 11.42s/it]


訓練次數1180，總回報324.3111111111113


 24%|████████                          | 1190/5000 [2:51:23<13:28:29, 12.73s/it]


訓練次數1190，總回報405.4059347180995


 24%|████████▏                         | 1200/5000 [2:52:49<10:35:58, 10.04s/it]


訓練次數1200，總回報316.3423728813557


 24%|████████▍                          | 1210/5000 [2:54:09<9:49:47,  9.34s/it]


訓練次數1210，總回報162.26775244299736


 24%|████████▎                         | 1220/5000 [2:55:16<10:15:59,  9.78s/it]


訓練次數1220，總回報410.01290322580473


 25%|████████▎                         | 1230/5000 [2:56:48<10:37:24, 10.14s/it]


訓練次數1230，總回報379.4668789808893


 25%|████████▍                         | 1240/5000 [2:58:21<12:05:37, 11.58s/it]


訓練次數1240，總回報233.7623145400605


 25%|████████▌                         | 1250/5000 [2:59:58<10:07:25,  9.72s/it]


訓練次數1250，總回報468.2713310580158


 25%|████████▌                         | 1260/5000 [3:02:09<12:47:58, 12.32s/it]


訓練次數1260，總回報883.9905660377293


 25%|████████▋                         | 1270/5000 [3:03:31<11:12:33, 10.82s/it]


訓練次數1270，總回報533.296774193546


 26%|████████▋                         | 1280/5000 [3:05:21<13:56:55, 13.50s/it]


訓練次數1280，總回報328.4024390243906


 26%|████████▊                         | 1290/5000 [3:06:59<13:10:50, 12.79s/it]


訓練次數1290，總回報307.30259740259805


 26%|█████████                          | 1300/5000 [3:08:22<9:25:55,  9.18s/it]


訓練次數1300，總回報269.2169014084519


 26%|████████▉                         | 1310/5000 [3:09:52<11:55:45, 11.64s/it]


訓練次數1310，總回報550.3234875444805


 26%|████████▉                         | 1320/5000 [3:11:25<10:27:03, 10.22s/it]


訓練次數1320，總回報140.35925925925966


 27%|█████████▎                         | 1330/5000 [3:12:49<9:48:06,  9.61s/it]


訓練次數1330，總回報247.47383177570217


 27%|█████████                         | 1340/5000 [3:14:39<11:38:01, 11.44s/it]


訓練次數1340，總回報405.0444444444429


 27%|█████████▏                        | 1350/5000 [3:16:19<13:13:24, 13.04s/it]


訓練次數1350，總回報504.8071146245043


 27%|█████████▏                        | 1360/5000 [3:18:10<12:21:58, 12.23s/it]


訓練次數1360，總回報425.61627906976696


 27%|█████████▎                        | 1370/5000 [3:19:45<10:58:13, 10.88s/it]


訓練次數1370，總回報356.4702702702698


 28%|█████████▍                        | 1380/5000 [3:21:32<11:54:59, 11.85s/it]


訓練次數1380，總回報678.9273037542561


 28%|█████████▍                        | 1390/5000 [3:23:09<11:38:00, 11.60s/it]


訓練次數1390，總回報429.4230215827312


 28%|█████████▌                        | 1400/5000 [3:25:13<12:55:10, 12.92s/it]


訓練次數1400，總回報409.77586206896336


 28%|█████████▌                        | 1410/5000 [3:26:56<13:18:08, 13.34s/it]


訓練次數1410，總回報878.6909090908989


 28%|█████████▉                         | 1420/5000 [3:28:16<7:47:23,  7.83s/it]


訓練次數1420，總回報247.8878048780503


 29%|██████████                         | 1430/5000 [3:29:35<7:51:20,  7.92s/it]


訓練次數1430，總回報281.2684931506851


 29%|██████████                         | 1440/5000 [3:31:05<9:48:45,  9.92s/it]


訓練次數1440，總回報420.1681159420279


 29%|██████████▏                        | 1450/5000 [3:32:25<7:21:21,  7.46s/it]


訓練次數1450，總回報67.32238267148011


 29%|██████████▏                        | 1460/5000 [3:33:54<9:24:59,  9.58s/it]


訓練次數1460，總回報289.9197231833912


 29%|██████████▎                        | 1470/5000 [3:35:01<7:14:31,  7.39s/it]


訓練次數1470，總回報154.14887459807105


 30%|██████████▎                        | 1480/5000 [3:36:17<9:22:06,  9.58s/it]


訓練次數1480，總回報259.0076923076928


 30%|██████████▍                        | 1490/5000 [3:37:38<9:21:57,  9.61s/it]


訓練次數1490，總回報288.8547703180215


 30%|██████████▏                       | 1500/5000 [3:39:17<10:33:36, 10.86s/it]


訓練次數1500，總回報612.4571428571367


 30%|██████████▎                       | 1510/5000 [3:40:54<12:08:11, 12.52s/it]


訓練次數1510，總回報609.633576642331


 30%|██████████▎                       | 1520/5000 [3:42:44<13:04:49, 13.53s/it]


訓練次數1520，總回報384.0576642335765


 31%|██████████▍                       | 1530/5000 [3:44:36<13:54:52, 14.44s/it]


訓練次數1530，總回報740.6428571428482


 31%|██████████▍                       | 1540/5000 [3:46:06<11:18:00, 11.76s/it]


訓練次數1540，總回報437.2764331210151


 31%|██████████▌                       | 1550/5000 [3:48:15<14:51:47, 15.51s/it]


訓練次數1550，總回報121.99422382671501


 31%|██████████▌                       | 1560/5000 [3:50:09<13:25:03, 14.04s/it]


訓練次數1560，總回報255.93559322033994


 31%|██████████▋                       | 1570/5000 [3:52:06<10:23:04, 10.90s/it]


訓練次數1570，總回報311.4444444444441


 32%|██████████▋                       | 1580/5000 [3:53:54<12:55:39, 13.61s/it]


訓練次數1580，總回報372.72499999999866


 32%|██████████▊                       | 1590/5000 [3:55:59<12:58:58, 13.71s/it]


訓練次數1590，總回報910.3313588850059


 32%|███████████▏                       | 1600/5000 [3:57:33<9:25:25,  9.98s/it]


訓練次數1600，總回報472.7825174825164


 32%|███████████▎                       | 1610/5000 [3:58:50<8:13:39,  8.74s/it]


訓練次數1610，總回報165.78918918918964


 32%|███████████                       | 1620/5000 [4:00:40<11:33:17, 12.31s/it]


訓練次數1620，總回報303.47909407665486


 33%|███████████▍                       | 1630/5000 [4:01:55<7:46:16,  8.30s/it]


訓練次數1630，總回報154.98338557993796


 33%|███████████▏                      | 1640/5000 [4:03:31<10:13:40, 10.96s/it]


訓練次數1640，總回報646.7221453287171


 33%|███████████▌                       | 1650/5000 [4:05:10<9:36:45, 10.33s/it]


訓練次數1650，總回報62.68784194528864


 33%|███████████▎                      | 1660/5000 [4:07:00<11:21:12, 12.24s/it]


訓練次數1660，總回報240.54183006536059


 33%|███████████▎                      | 1670/5000 [4:08:42<11:04:33, 11.97s/it]


訓練次數1670，總回報284.77692307692337


 34%|███████████▍                      | 1680/5000 [4:10:22<10:54:15, 11.82s/it]


訓練次數1680，總回報387.00842490842393


 34%|███████████▊                       | 1690/5000 [4:11:33<8:42:52,  9.48s/it]


訓練次數1690，總回報346.27878787878626


 34%|███████████▉                       | 1700/5000 [4:12:48<9:32:06, 10.40s/it]


訓練次數1700，總回報87.53972602739736


 34%|███████████▉                       | 1710/5000 [4:14:18<8:28:18,  9.27s/it]


訓練次數1710，總回報93.01501706484659


 34%|███████████▋                      | 1720/5000 [4:16:11<12:44:28, 13.98s/it]


訓練次數1720，總回報696.8312101910717


 35%|███████████▊                      | 1730/5000 [4:18:06<13:42:50, 15.10s/it]


訓練次數1730，總回報359.29999999999905


 35%|████████████▏                      | 1740/5000 [4:19:46<9:37:20, 10.63s/it]


訓練次數1740，總回報433.3525547445248


 35%|████████████▎                      | 1750/5000 [4:21:17<9:28:53, 10.50s/it]


訓練次數1750，總回報82.06097560975613


 35%|███████████▉                      | 1760/5000 [4:23:20<12:17:13, 13.65s/it]


訓練次數1760，總回報581.5103896103849


 35%|████████████                      | 1770/5000 [4:25:12<13:15:53, 14.78s/it]


訓練次數1770，總回報398.97796610169314


 36%|████████████                      | 1780/5000 [4:26:40<10:33:14, 11.80s/it]


訓練次數1780，總回報382.38581560283603


 36%|████████████▏                     | 1790/5000 [4:28:19<13:30:33, 15.15s/it]


訓練次數1790，總回報500.87051671732144


 36%|████████████▏                     | 1800/5000 [4:30:02<10:31:19, 11.84s/it]


訓練次數1800，總回報433.3521126760554


 36%|████████████▋                      | 1810/5000 [4:31:20<7:41:51,  8.69s/it]


訓練次數1810，總回報406.4228070175409


 36%|████████████▋                      | 1820/5000 [4:32:43<8:10:32,  9.26s/it]


訓練次數1820，總回報225.6803921568631


 37%|████████████▍                     | 1830/5000 [4:35:16<18:10:36, 20.64s/it]


訓練次數1830，總回報762.5157894736736


 37%|████████████▌                     | 1840/5000 [4:37:09<12:12:23, 13.91s/it]


訓練次數1840，總回報710.7746835442971


 37%|████████████▉                      | 1850/5000 [4:38:41<7:24:24,  8.46s/it]


訓練次數1850，總回報450.1326018808758


 37%|█████████████                      | 1860/5000 [4:39:40<6:07:51,  7.03s/it]


訓練次數1860，總回報436.5996563573867


 37%|█████████████                      | 1870/5000 [4:40:19<4:13:54,  4.87s/it]


訓練次數1870，總回報153.07692307692335


 38%|█████████████▏                     | 1880/5000 [4:41:13<5:37:19,  6.49s/it]


訓練次數1880，總回報44.021768707482934


 38%|████████████▊                     | 1890/5000 [4:43:15<11:00:41, 12.75s/it]


訓練次數1890，總回報915.1699186991827


 38%|████████████▉                     | 1900/5000 [4:45:18<12:02:31, 13.98s/it]


訓練次數1900，總回報717.2457516339772


 38%|████████████▉                     | 1910/5000 [4:47:20<14:46:12, 17.21s/it]


訓練次數1910，總回報682.7777777777656


 38%|█████████████                     | 1920/5000 [4:49:29<13:27:17, 15.73s/it]


訓練次數1920，總回報884.5107023411222


 39%|█████████████▌                     | 1930/5000 [4:50:50<7:51:36,  9.22s/it]


訓練次數1930，總回報321.97435897435923


 39%|█████████████▏                    | 1940/5000 [4:53:05<13:57:58, 16.43s/it]


訓練次數1940，總回報918.7925925925815


 39%|█████████████▋                     | 1950/5000 [4:54:15<6:13:14,  7.34s/it]


訓練次數1950，總回報114.74285714285732


 39%|█████████████▋                     | 1960/5000 [4:55:49<9:04:16, 10.74s/it]


訓練次數1960，總回報387.4674267100967


 39%|█████████████▍                    | 1970/5000 [4:57:42<11:28:18, 13.63s/it]


訓練次數1970，總回報412.7518518518497


 40%|█████████████▍                    | 1980/5000 [4:59:31<10:28:49, 12.49s/it]


訓練次數1980，總回報521.5313725490149


 40%|█████████████▌                    | 1990/5000 [5:01:04<11:09:24, 13.34s/it]


訓練次數1990，總回報739.4555555555457


 40%|██████████████                     | 2000/5000 [5:02:39<8:36:14, 10.32s/it]


訓練次數2000，總回報196.150819672132


 40%|██████████████                     | 2010/5000 [5:04:31<9:51:51, 11.88s/it]


訓練次數2010，總回報132.36122448979614


 40%|█████████████▋                    | 2020/5000 [5:06:41<12:42:57, 15.36s/it]


訓練次數2020，總回報385.70654205607343


 41%|██████████████▏                    | 2030/5000 [5:08:12<9:14:21, 11.20s/it]


訓練次數2030，總回報319.30992907801453


 41%|██████████████▎                    | 2040/5000 [5:10:13<9:06:29, 11.08s/it]


訓練次數2040，總回報295.36712328767123


 41%|█████████████▉                    | 2050/5000 [5:11:56<10:57:44, 13.38s/it]


訓練次數2050，總回報913.7506849314924


 41%|██████████████                    | 2060/5000 [5:13:40<10:18:59, 12.63s/it]


訓練次數2060，總回報407.2301886792419


 41%|██████████████                    | 2070/5000 [5:15:37<12:13:34, 15.02s/it]


訓練次數2070，總回報379.32186495176643


 42%|██████████████▏                   | 2080/5000 [5:17:34<13:33:12, 16.71s/it]


訓練次數2080，總回報630.7632107023348


 42%|██████████████▏                   | 2090/5000 [5:19:44<14:08:14, 17.49s/it]


訓練次數2090，總回報909.4740614334308


 42%|██████████████▎                   | 2100/5000 [5:21:46<11:43:25, 14.55s/it]


訓練次數2100，總回報293.3473684210524


 42%|██████████████▊                    | 2110/5000 [5:23:25<9:59:10, 12.44s/it]


訓練次數2110，總回報332.3749196141471


 42%|██████████████▍                   | 2120/5000 [5:25:15<10:44:45, 13.43s/it]


訓練次數2120，總回報760.7603174603126


 43%|██████████████▍                   | 2130/5000 [5:27:14<12:49:38, 16.09s/it]


訓練次數2130，總回報919.5371647509534


 43%|██████████████▉                    | 2140/5000 [5:28:51<9:49:00, 12.36s/it]


訓練次數2140，總回報678.0073800737968


 43%|██████████████▌                   | 2150/5000 [5:31:12<14:10:39, 17.91s/it]


訓練次數2150，總回報875.565779467668


 43%|███████████████                    | 2160/5000 [5:32:42<9:15:05, 11.73s/it]


訓練次數2160，總回報591.348109965631


 43%|███████████████▏                   | 2170/5000 [5:34:27<9:20:14, 11.88s/it]


訓練次數2170，總回報920.2007299269945


 44%|███████████████▎                   | 2180/5000 [5:36:05<8:10:29, 10.44s/it]


訓練次數2180，總回報744.9553956834444


 44%|██████████████▉                   | 2190/5000 [5:38:23<12:31:41, 16.05s/it]


訓練次數2190，總回報348.90909090908906


 44%|██████████████▉                   | 2200/5000 [5:40:34<13:12:36, 16.98s/it]


訓練次數2200，總回報909.8824561403363


 44%|███████████████                   | 2210/5000 [5:42:26<10:10:45, 13.13s/it]


訓練次數2210，總回報488.9168284789607


 44%|███████████████                   | 2220/5000 [5:44:20<11:36:20, 15.03s/it]


訓練次數2220，總回報800.3555555555442


 45%|███████████████▏                  | 2230/5000 [5:46:52<14:04:24, 18.29s/it]


訓練次數2230，總回報711.0344370860889


 45%|███████████████▏                  | 2240/5000 [5:48:38<10:08:14, 13.22s/it]


訓練次數2240，總回報295.08971061093234


 45%|███████████████▊                   | 2250/5000 [5:50:33<8:58:29, 11.75s/it]


訓練次數2250，總回報368.842857142856


 45%|███████████████▎                  | 2260/5000 [5:52:37<12:37:06, 16.58s/it]


訓練次數2260，總回報795.9723183390945


 45%|███████████████▍                  | 2270/5000 [5:54:46<12:13:41, 16.12s/it]


訓練次數2270，總回報907.6333333333254


 46%|███████████████▌                  | 2280/5000 [5:56:43<10:59:14, 14.54s/it]


訓練次數2280，總回報701.7999999999926


 46%|███████████████▌                  | 2290/5000 [5:58:47<12:45:59, 16.96s/it]


訓練次數2290，總回報637.1606557376995


 46%|████████████████                   | 2300/5000 [6:00:50<9:53:17, 13.18s/it]


訓練次數2300，總回報143.2903225806455


 46%|███████████████▋                  | 2310/5000 [6:02:46<11:05:52, 14.85s/it]


訓練次數2310，總回報666.0416938110666


 46%|███████████████▊                  | 2320/5000 [6:04:34<10:35:23, 14.23s/it]


訓練次數2320，總回報886.8699633699558


 47%|████████████████▎                  | 2330/5000 [6:06:07<8:38:49, 11.66s/it]


訓練次數2330，總回報778.4573426573348


 47%|███████████████▉                  | 2340/5000 [6:08:02<10:25:36, 14.11s/it]


訓練次數2340，總回報907.4426229508089


 47%|███████████████▉                  | 2350/5000 [6:10:11<12:31:49, 17.02s/it]


訓練次數2350，總回報911.5315412186281


 47%|████████████████                  | 2360/5000 [6:11:59<10:10:25, 13.87s/it]


訓練次數2360，總回報440.03054662379145


 47%|████████████████                  | 2370/5000 [6:14:05<11:30:16, 15.75s/it]


訓練次數2370，總回報266.6778546712808


 48%|████████████████▋                  | 2380/5000 [6:16:03<9:30:07, 13.06s/it]


訓練次數2380，總回報521.2285714285673


 48%|████████████████▋                  | 2390/5000 [6:17:41<7:55:28, 10.93s/it]


訓練次數2390，總回報496.0917525773168


 48%|████████████████▎                 | 2400/5000 [6:19:35<10:06:20, 13.99s/it]


訓練次數2400，總回報612.4666666666619


 48%|████████████████▍                 | 2410/5000 [6:22:01<12:32:32, 17.43s/it]


訓練次數2410，總回報549.3333333333284


 48%|████████████████▉                  | 2420/5000 [6:23:38<6:43:29,  9.38s/it]


訓練次數2420，總回報567.4242424242392


 49%|████████████████▌                 | 2430/5000 [6:25:55<12:50:41, 17.99s/it]


訓練次數2430，總回報417.83291925465585


 49%|█████████████████                  | 2440/5000 [6:28:12<9:55:31, 13.96s/it]


訓練次數2440，總回報549.2647058823499


 49%|█████████████████▏                 | 2450/5000 [6:30:07<8:22:50, 11.83s/it]


訓練次數2450，總回報504.15971223021177


 49%|████████████████▋                 | 2460/5000 [6:32:02<11:14:18, 15.93s/it]


訓練次數2460，總回報566.687499999996


 49%|█████████████████▎                 | 2470/5000 [6:33:38<8:33:57, 12.19s/it]


訓練次數2470，總回報477.6142857142821


 50%|█████████████████▎                 | 2480/5000 [6:34:44<7:30:07, 10.72s/it]


訓練次數2480，總回報855.4084967320098


 50%|█████████████████▍                 | 2490/5000 [6:36:38<9:35:05, 13.75s/it]


訓練次數2490，總回報709.1234899328767


 50%|█████████████████▌                 | 2500/5000 [6:38:16<8:34:31, 12.35s/it]


訓練次數2500，總回報717.4588235294059


 50%|█████████████████▌                 | 2510/5000 [6:40:07<9:20:22, 13.50s/it]


訓練次數2510，總回報379.1303370786507


 50%|█████████████████▏                | 2520/5000 [6:42:09<10:09:27, 14.74s/it]


訓練次數2520，總回報914.7797833934973


 51%|█████████████████▏                | 2530/5000 [6:44:06<10:00:05, 14.58s/it]


訓練次數2530，總回報688.3686567164106


 51%|█████████████████▎                | 2540/5000 [6:46:07<11:25:56, 16.73s/it]


訓練次數2540，總回報857.8113268608296


 51%|█████████████████▎                | 2550/5000 [6:47:51<10:19:33, 15.17s/it]


訓練次數2550，總回報916.4795847750812


 51%|█████████████████▍                | 2560/5000 [6:50:10<10:11:44, 15.04s/it]


訓練次數2560，總回報90.92710622710636


 51%|█████████████████▉                 | 2570/5000 [6:51:54<8:03:28, 11.94s/it]


訓練次數2570，總回報922.5528301886736


 52%|██████████████████                 | 2580/5000 [6:53:44<8:17:00, 12.32s/it]


訓練次數2580，總回報886.930742049459


 52%|██████████████████▏                | 2590/5000 [6:55:28<7:15:16, 10.84s/it]


訓練次數2590，總回報294.69813664596296


 52%|██████████████████▏                | 2600/5000 [6:57:00<7:12:28, 10.81s/it]


訓練次數2600，總回報264.7962962962973


 52%|██████████████████▎                | 2610/5000 [6:58:57<8:40:04, 13.06s/it]


訓練次數2610，總回報554.3421768707462


 52%|█████████████████▊                | 2620/5000 [7:01:06<11:16:57, 17.07s/it]


訓練次數2620，總回報545.3905109489024


 53%|██████████████████▍                | 2630/5000 [7:03:20<9:30:37, 14.45s/it]


訓練次數2630，總回報372.4569620253153


 53%|██████████████████▍                | 2640/5000 [7:04:57<6:24:54,  9.79s/it]


訓練次數2640，總回報155.0833855799379


 53%|██████████████████                | 2650/5000 [7:07:10<11:38:44, 17.84s/it]


訓練次數2650，總回報642.2794520547882


 53%|██████████████████                | 2660/5000 [7:09:16<11:28:47, 17.66s/it]


訓練次數2660，總回報674.9620915032593


 53%|██████████████████▋                | 2670/5000 [7:11:15<8:59:37, 13.90s/it]


訓練次數2670，總回報338.82499999999936


 54%|██████████████████▊                | 2680/5000 [7:12:51<7:57:33, 12.35s/it]


訓練次數2680，總回報291.44181184669003


 54%|██████████████████▊                | 2690/5000 [7:14:40<8:18:46, 12.96s/it]


訓練次數2690，總回報712.3418604651143


 54%|██████████████████▉                | 2700/5000 [7:16:46<8:06:42, 12.70s/it]


訓練次數2700，總回報724.0649122806932


 54%|██████████████████▉                | 2710/5000 [7:18:47<8:16:51, 13.02s/it]


訓練次數2710，總回報917.6371647509534


 54%|███████████████████                | 2720/5000 [7:20:31<8:06:43, 12.81s/it]


訓練次數2720，總回報590.3358974358941


 55%|██████████████████▌               | 2730/5000 [7:22:32<11:45:20, 18.64s/it]


訓練次數2730，總回報629.1249999999916


 55%|███████████████████▏               | 2740/5000 [7:24:04<6:20:54, 10.11s/it]


訓練次數2740，總回報43.93728813559314


 55%|███████████████████▎               | 2750/5000 [7:25:39<9:15:27, 14.81s/it]


訓練次數2750，總回報912.3199261992545


 55%|███████████████████▎               | 2760/5000 [7:27:52<9:47:31, 15.74s/it]


訓練次數2760，總回報877.2402877697721


 55%|███████████████████▍               | 2770/5000 [7:29:36<7:57:30, 12.85s/it]


訓練次數2770，總回報526.4294117647033


 56%|███████████████████▍               | 2780/5000 [7:31:29<8:42:01, 14.11s/it]


訓練次數2780，總回報697.5049382715949


 56%|███████████████████▌               | 2790/5000 [7:33:21<9:26:07, 15.37s/it]


訓練次數2790，總回報238.68633540372795


 56%|███████████████████               | 2800/5000 [7:35:51<11:36:29, 19.00s/it]


訓練次數2800，總回報905.4507936507777


 56%|███████████████████▋               | 2810/5000 [7:38:17<7:40:08, 12.61s/it]


訓練次數2810，總回報161.27346938775543


 56%|███████████████████▋               | 2820/5000 [7:40:14<9:07:07, 15.06s/it]


訓練次數2820，總回報890.0069597069539


 57%|███████████████████▊               | 2830/5000 [7:42:24<9:43:46, 16.14s/it]


訓練次數2830，總回報463.8821086261962


 57%|███████████████████▉               | 2840/5000 [7:43:39<6:24:20, 10.68s/it]


訓練次數2840，總回報140.00476190476238


 57%|███████████████████▉               | 2850/5000 [7:45:29<9:11:07, 15.38s/it]


訓練次數2850，總回報881.8206896551601


 57%|████████████████████               | 2860/5000 [7:47:23<9:36:21, 16.16s/it]


訓練次數2860，總回報899.6304075234913


 57%|████████████████████               | 2870/5000 [7:48:29<6:20:23, 10.72s/it]


訓練次數2870，總回報902.8993399339851


 58%|███████████████████▌              | 2880/5000 [7:51:00<11:40:05, 19.81s/it]


訓練次數2880，總回報857.7025641025534


 58%|████████████████████▏              | 2890/5000 [7:52:21<6:29:23, 11.07s/it]


訓練次數2890，總回報254.40000000000103


 58%|████████████████████▎              | 2900/5000 [7:54:17<6:49:24, 11.70s/it]


訓練次數2900，總回報457.3679738562071


 58%|████████████████████▎              | 2910/5000 [7:56:07<7:58:30, 13.74s/it]


訓練次數2910，總回報556.6315789473623


 58%|████████████████████▍              | 2920/5000 [7:57:54<6:24:33, 11.09s/it]


訓練次數2920，總回報374.16153846153725


 59%|████████████████████▌              | 2930/5000 [7:59:45<6:42:40, 11.67s/it]


訓練次數2930，總回報578.9671328671294


 59%|████████████████████▌              | 2940/5000 [8:01:32<7:13:47, 12.63s/it]


訓練次數2940，總回報735.1968838526839


 59%|████████████████████▋              | 2950/5000 [8:03:26<7:55:34, 13.92s/it]


訓練次數2950，總回報881.7481481481386


 59%|████████████████████▋              | 2960/5000 [8:04:57<5:07:35,  9.05s/it]


訓練次數2960，總回報184.62919708029278


 59%|████████████████████▊              | 2970/5000 [8:06:52<8:15:11, 14.64s/it]


訓練次數2970，總回報342.49470198675465


 60%|████████████████████▊              | 2980/5000 [8:08:32<6:51:52, 12.23s/it]


訓練次數2980，總回報914.7432432432278


 60%|████████████████████▉              | 2990/5000 [8:10:39<8:32:36, 15.30s/it]


訓練次數2990，總回報680.0416149068241


 60%|█████████████████████              | 3000/5000 [8:12:41<7:14:58, 13.05s/it]


訓練次數3000，總回報855.219335347414


 60%|█████████████████████              | 3010/5000 [8:14:26<7:21:28, 13.31s/it]


訓練次數3010，總回報438.90649350649244


 60%|█████████████████████▏             | 3020/5000 [8:16:00<7:35:36, 13.81s/it]


訓練次數3020，總回報276.32549019607933


 61%|█████████████████████▏             | 3030/5000 [8:17:58<6:36:17, 12.07s/it]


訓練次數3030，總回報256.189419795223


 61%|█████████████████████▎             | 3040/5000 [8:19:58<8:47:07, 16.14s/it]


訓練次數3040，總回報918.7739926739858


 61%|█████████████████████▎             | 3050/5000 [8:21:38<5:59:03, 11.05s/it]


訓練次數3050，總回報40.17567567567561


 61%|█████████████████████▍             | 3060/5000 [8:23:10<5:32:43, 10.29s/it]


訓練次數3060，總回報190.291304347827


 61%|█████████████████████▍             | 3070/5000 [8:24:40<4:40:22,  8.72s/it]


訓練次數3070，總回報215.56644295302078


 62%|█████████████████████▌             | 3080/5000 [8:26:51<8:04:53, 15.15s/it]


訓練次數3080，總回報494.27586206896


 62%|█████████████████████▋             | 3090/5000 [8:28:36<5:58:24, 11.26s/it]


訓練次數3090，總回報674.4571428571346


 62%|█████████████████████▋             | 3100/5000 [8:29:59<5:08:13,  9.73s/it]


訓練次數3100，總回報114.1704871060174


 62%|█████████████████████▊             | 3110/5000 [8:32:26<9:57:40, 18.97s/it]


訓練次數3110，總回報800.5696202531549


 62%|█████████████████████▊             | 3120/5000 [8:34:07<6:46:00, 12.96s/it]


訓練次數3120，總回報902.6313588850041


 63%|█████████████████████▉             | 3130/5000 [8:36:26<7:53:49, 15.20s/it]


訓練次數3130，總回報688.4305647840482


 63%|█████████████████████▉             | 3140/5000 [8:38:16<6:45:05, 13.07s/it]


訓練次數3140，總回報381.80901287553473


 63%|██████████████████████             | 3150/5000 [8:40:25<9:35:10, 18.65s/it]


訓練次數3150，總回報301.682890855457


 63%|██████████████████████             | 3160/5000 [8:42:12<7:24:01, 14.48s/it]


訓練次數3160，總回報226.2231884057976


 63%|██████████████████████▏            | 3170/5000 [8:44:36<9:12:51, 18.13s/it]


訓練次數3170，總回報907.6774834437011


 64%|██████████████████████▎            | 3180/5000 [8:46:19<5:46:45, 11.43s/it]


訓練次數3180，總回報100.06384364820866


 64%|██████████████████████▎            | 3190/5000 [8:48:42<7:26:25, 14.80s/it]


訓練次數3190，總回報353.330769230769


 64%|██████████████████████▍            | 3200/5000 [8:50:13<6:10:45, 12.36s/it]


訓練次數3200，總回報464.1718954248338


 64%|██████████████████████▍            | 3210/5000 [8:51:59<6:09:56, 12.40s/it]


訓練次數3210，總回報913.0432432432274


 64%|██████████████████████▌            | 3220/5000 [8:53:53<7:33:13, 15.28s/it]


訓練次數3220，總回報915.9825622775711


 65%|██████████████████████▌            | 3230/5000 [8:55:24<4:45:06,  9.66s/it]


訓練次數3230，總回報460.5034267912749


 65%|██████████████████████▋            | 3240/5000 [8:57:33<7:25:48, 15.20s/it]


訓練次數3240，總回報906.1825622775689


 65%|██████████████████████▊            | 3250/5000 [8:59:09<4:44:51,  9.77s/it]


訓練次數3250，總回報583.4307692307651


 65%|██████████████████████▊            | 3260/5000 [9:01:02<6:26:24, 13.32s/it]


訓練次數3260，總回報104.21212121212135


 65%|██████████████████████▉            | 3270/5000 [9:02:47<6:54:54, 14.39s/it]


訓練次數3270，總回報576.6722222222179


 66%|██████████████████████▉            | 3280/5000 [9:04:45<6:22:59, 13.36s/it]


訓練次數3280，總回報918.2555555555407


 66%|███████████████████████            | 3290/5000 [9:06:34<4:42:36,  9.92s/it]


訓練次數3290，總回報168.02627986348185


 66%|███████████████████████            | 3300/5000 [9:08:33<7:21:15, 15.57s/it]


訓練次數3300，總回報780.8163822525488


 66%|███████████████████████▏           | 3310/5000 [9:10:13<5:34:57, 11.89s/it]


訓練次數3310，總回報707.2458498023685


 66%|███████████████████████▏           | 3320/5000 [9:12:06<6:08:25, 13.16s/it]


訓練次數3320，總回報889.2036496350236


 67%|███████████████████████▎           | 3330/5000 [9:14:03<7:10:46, 15.48s/it]


訓練次數3330，總回報705.6052631578848


 67%|███████████████████████▍           | 3340/5000 [9:15:50<7:06:03, 15.40s/it]


訓練次數3340，總回報859.1284403669545


 67%|███████████████████████▍           | 3350/5000 [9:17:46<6:22:09, 13.90s/it]


訓練次數3350，總回報369.3655052264796


 67%|███████████████████████▌           | 3360/5000 [9:20:16<7:16:04, 15.95s/it]


訓練次數3360，總回報391.77278911564554


 67%|███████████████████████▌           | 3370/5000 [9:22:17<7:27:14, 16.46s/it]


訓練次數3370，總回報912.4271477663083


 68%|███████████████████████▋           | 3380/5000 [9:24:23<6:15:02, 13.89s/it]


訓練次數3380，總回報708.3098039215585


 68%|███████████████████████▋           | 3390/5000 [9:26:15<7:11:51, 16.09s/it]


訓練次數3390，總回報399.5730375426598


 68%|███████████████████████▊           | 3400/5000 [9:28:32<7:08:27, 16.07s/it]


訓練次數3400，總回報627.2841423948157


 68%|███████████████████████▊           | 3410/5000 [9:30:31<6:37:32, 15.00s/it]


訓練次數3410，總回報568.7790697674407


 68%|███████████████████████▉           | 3420/5000 [9:32:35<6:57:35, 15.86s/it]


訓練次數3420，總回報924.1178988326787


 69%|████████████████████████           | 3430/5000 [9:34:20<4:08:00,  9.48s/it]


訓練次數3430，總回報327.7484949832778


 69%|████████████████████████           | 3440/5000 [9:36:46<6:09:30, 14.21s/it]


訓練次數3440，總回報917.3078014184317


 69%|████████████████████████▏          | 3450/5000 [9:39:00<7:18:45, 16.98s/it]


訓練次數3450，總回報471.7806451612881


 69%|████████████████████████▏          | 3460/5000 [9:41:12<7:17:47, 17.06s/it]


訓練次數3460，總回報851.63118279569


 69%|████████████████████████▎          | 3470/5000 [9:42:25<3:28:47,  8.19s/it]


訓練次數3470，總回報457.62371541501864


 70%|████████████████████████▎          | 3480/5000 [9:44:32<8:01:05, 18.99s/it]


訓練次數3480，總回報850.4262295081863


 70%|████████████████████████▍          | 3490/5000 [9:46:38<7:55:46, 18.91s/it]


訓練次數3490，總回報916.2797833934976


 70%|████████████████████████▌          | 3500/5000 [9:48:50<5:59:37, 14.38s/it]


訓練次數3500，總回報450.41812297734225


 70%|████████████████████████▌          | 3510/5000 [9:51:01<6:37:33, 16.01s/it]


訓練次數3510，總回報913.5577464788582


 70%|████████████████████████▋          | 3520/5000 [9:52:59<5:26:45, 13.25s/it]


訓練次數3520，總回報470.0845070422522


 71%|████████████████████████▋          | 3530/5000 [9:55:24<7:11:19, 17.61s/it]


訓練次數3530，總回報507.94782608695454


 71%|████████████████████████▊          | 3540/5000 [9:57:15<6:30:05, 16.03s/it]


訓練次數3540，總回報924.5259842519624


 71%|████████████████████████▊          | 3550/5000 [9:59:19<6:04:08, 15.07s/it]


訓練次數3550，總回報371.0222222222217


 71%|████████████████████████▏         | 3560/5000 [10:01:05<6:31:50, 16.33s/it]


訓練次數3560，總回報919.8739926739862


 71%|████████████████████████▎         | 3570/5000 [10:03:19<5:57:17, 14.99s/it]


訓練次數3570，總回報775.2893992932782


 72%|████████████████████████▎         | 3580/5000 [10:05:54<7:32:58, 19.14s/it]


訓練次數3580，總回報755.9839721254274


 72%|████████████████████████▍         | 3590/5000 [10:08:13<5:33:47, 14.20s/it]


訓練次數3590，總回報482.3725752508343


 72%|████████████████████████▍         | 3600/5000 [10:10:17<5:13:34, 13.44s/it]


訓練次數3600，總回報350.0103896103896


 72%|████████████████████████▌         | 3610/5000 [10:12:24<6:10:04, 15.97s/it]


訓練次數3610，總回報784.9087452471379


 72%|████████████████████████▌         | 3620/5000 [10:14:29<6:55:22, 18.06s/it]


訓練次數3620，總回報528.9370919881267


 73%|████████████████████████▋         | 3630/5000 [10:16:04<4:15:51, 11.21s/it]


訓練次數3630，總回報508.4220338983009


 73%|████████████████████████▊         | 3640/5000 [10:18:13<5:19:48, 14.11s/it]


訓練次數3640，總回報534.9063829787206


 73%|████████████████████████▊         | 3650/5000 [10:20:24<5:36:15, 14.95s/it]


訓練次數3650，總回報917.050684931493


 73%|████████████████████████▉         | 3660/5000 [10:22:27<6:00:42, 16.15s/it]


訓練次數3660，總回報920.1199261992564


 73%|████████████████████████▉         | 3670/5000 [10:24:54<6:08:33, 16.63s/it]


訓練次數3670，總回報914.5315412186275


 74%|█████████████████████████         | 3680/5000 [10:26:33<3:55:56, 10.72s/it]


訓練次數3680，總回報305.5226148409896


 74%|█████████████████████████         | 3690/5000 [10:28:41<5:42:31, 15.69s/it]


訓練次數3690，總回報578.3230769230735


 74%|█████████████████████████▏        | 3700/5000 [10:30:53<4:50:10, 13.39s/it]


訓練次數3700，總回報318.7692307692311


 74%|█████████████████████████▏        | 3710/5000 [10:32:39<6:02:06, 16.84s/it]


訓練次數3710，總回報851.0887537993809


 74%|█████████████████████████▎        | 3720/5000 [10:34:11<4:07:01, 11.58s/it]


訓練次數3720，總回報743.3461538461465


 75%|█████████████████████████▎        | 3730/5000 [10:36:20<4:18:49, 12.23s/it]


訓練次數3730，總回報604.8116564417132


 75%|█████████████████████████▍        | 3740/5000 [10:38:17<5:06:53, 14.61s/it]


訓練次數3740，總回報596.3947712418241


 75%|█████████████████████████▌        | 3750/5000 [10:40:14<4:59:28, 14.37s/it]


訓練次數3750，總回報922.0093632958736


 75%|█████████████████████████▌        | 3760/5000 [10:42:19<5:41:58, 16.55s/it]


訓練次數3760，總回報883.5729729729555


 75%|█████████████████████████▋        | 3770/5000 [10:44:10<5:04:23, 14.85s/it]


訓練次數3770，總回報610.6524590163887


 76%|█████████████████████████▋        | 3780/5000 [10:46:10<4:29:18, 13.24s/it]


訓練次數3780，總回報515.9789808917138


 76%|█████████████████████████▊        | 3790/5000 [10:48:12<5:15:40, 15.65s/it]


訓練次數3790，總回報910.4853420195268


 76%|█████████████████████████▊        | 3800/5000 [10:49:57<4:13:54, 12.70s/it]


訓練次數3800，總回報122.01419141914232


 76%|█████████████████████████▉        | 3810/5000 [10:51:57<4:47:44, 14.51s/it]


訓練次數3810，總回報924.6948616600747


 76%|█████████████████████████▉        | 3820/5000 [10:53:58<5:20:58, 16.32s/it]


訓練次數3820，總回報622.0672131147493


 77%|██████████████████████████        | 3830/5000 [10:55:45<4:41:40, 14.45s/it]


訓練次數3830，總回報181.0303405572761


 77%|██████████████████████████        | 3840/5000 [10:57:29<4:40:16, 14.50s/it]


訓練次數3840，總回報915.6972789115591


 77%|██████████████████████████▏       | 3850/5000 [10:59:38<5:30:01, 17.22s/it]


訓練次數3850，總回報509.32439024390015


 77%|██████████████████████████▏       | 3860/5000 [11:01:33<4:50:02, 15.27s/it]


訓練次數3860，總回報861.9546762589815


 77%|██████████████████████████▎       | 3870/5000 [11:03:32<4:10:21, 13.29s/it]


訓練次數3870，總回報919.452830188673


 78%|██████████████████████████▍       | 3880/5000 [11:05:35<3:58:43, 12.79s/it]


訓練次數3880，總回報906.3630824372652


 78%|██████████████████████████▍       | 3890/5000 [11:07:36<4:56:59, 16.05s/it]


訓練次數3890，總回報527.8027190332255


 78%|██████████████████████████▌       | 3900/5000 [11:09:31<4:49:33, 15.79s/it]


訓練次數3900，總回報916.5328621908017


 78%|██████████████████████████▌       | 3910/5000 [11:11:43<5:17:55, 17.50s/it]


訓練次數3910，總回報869.0438127090165


 78%|██████████████████████████▋       | 3920/5000 [11:13:43<4:47:48, 15.99s/it]


訓練次數3920，總回報734.9511254019193


 79%|██████████████████████████▋       | 3930/5000 [11:15:28<4:14:26, 14.27s/it]


訓練次數3930，總回報918.2925925925812


 79%|██████████████████████████▊       | 3940/5000 [11:17:09<3:16:16, 11.11s/it]


訓練次數3940，總回報330.9545994065269


 79%|██████████████████████████▊       | 3950/5000 [11:19:05<3:49:04, 13.09s/it]


訓練次數3950，總回報155.87058823529443


 79%|██████████████████████████▉       | 3960/5000 [11:20:44<4:43:24, 16.35s/it]


訓練次數3960，總回報914.782562277571


 79%|██████████████████████████▉       | 3970/5000 [11:22:43<4:27:59, 15.61s/it]


訓練次數3970，總回報837.0987654320819


 80%|███████████████████████████       | 3980/5000 [11:24:49<4:07:19, 14.55s/it]


訓練次數3980，總回報226.4102564102573


 80%|███████████████████████████▏      | 3990/5000 [11:26:43<3:58:36, 14.18s/it]


訓練次數3990，總回報911.1313588850061


 80%|███████████████████████████▏      | 4000/5000 [11:28:48<4:24:34, 15.87s/it]


訓練次數4000，總回報911.9432432432274


 80%|███████████████████████████▎      | 4010/5000 [11:31:09<4:33:30, 16.58s/it]


訓練次數4010，總回報688.4950191570845


 80%|███████████████████████████▎      | 4020/5000 [11:33:27<5:00:55, 18.42s/it]


訓練次數4020，總回報899.3271604938095


 81%|███████████████████████████▍      | 4030/5000 [11:36:01<5:35:35, 20.76s/it]


訓練次數4030，總回報732.3809523809406


 81%|███████████████████████████▍      | 4040/5000 [11:37:59<3:51:04, 14.44s/it]


訓練次數4040，總回報296.4611111111117


 81%|███████████████████████████▌      | 4050/5000 [11:40:08<4:29:59, 17.05s/it]


訓練次數4050，總回報906.090851735006


 81%|███████████████████████████▌      | 4060/5000 [11:41:56<3:33:32, 13.63s/it]


訓練次數4060，總回報254.61428571428704


 81%|███████████████████████████▋      | 4070/5000 [11:43:56<3:34:45, 13.85s/it]


訓練次數4070，總回報570.6081272084776


 82%|███████████████████████████▋      | 4080/5000 [11:46:36<4:53:54, 19.17s/it]


訓練次數4080，總回報508.46417910447485


 82%|███████████████████████████▊      | 4090/5000 [11:48:40<3:33:49, 14.10s/it]


訓練次數4090，總回報757.7461538461476


 82%|███████████████████████████▉      | 4100/5000 [11:51:01<4:59:55, 20.00s/it]


訓練次數4100，總回報748.992052980127


 82%|███████████████████████████▉      | 4110/5000 [11:53:17<4:24:31, 17.83s/it]


訓練次數4110，總回報915.6007299269933


 82%|████████████████████████████      | 4120/5000 [11:55:06<3:14:34, 13.27s/it]


訓練次數4120，總回報372.3147540983598


 83%|████████████████████████████      | 4130/5000 [11:56:51<3:30:32, 14.52s/it]


訓練次數4130，總回報478.45280528052615


 83%|████████████████████████████▏     | 4140/5000 [11:58:58<4:00:33, 16.78s/it]


訓練次數4140，總回報916.8812030075051


 83%|████████████████████████████▏     | 4150/5000 [12:00:57<3:22:59, 14.33s/it]


訓練次數4150，總回報916.3203389830406


 83%|████████████████████████████▎     | 4160/5000 [12:02:51<3:16:01, 14.00s/it]


訓練次數4160，總回報788.0465949820716


 83%|████████████████████████████▎     | 4170/5000 [12:05:12<4:08:31, 17.97s/it]


訓練次數4170，總回報901.888819875761


 84%|████████████████████████████▍     | 4180/5000 [12:06:55<2:58:30, 13.06s/it]


訓練次數4180，總回報745.7548387096721


 84%|████████████████████████████▍     | 4190/5000 [12:09:20<4:15:25, 18.92s/it]


訓練次數4190，總回報920.7199261992565


 84%|████████████████████████████▌     | 4200/5000 [12:10:58<2:56:11, 13.21s/it]


訓練次數4200，總回報708.3958762886515


 84%|████████████████████████████▋     | 4210/5000 [12:12:43<2:11:01,  9.95s/it]


訓練次數4210，總回報510.88831168830956


 84%|████████████████████████████▋     | 4220/5000 [12:14:22<2:34:32, 11.89s/it]


訓練次數4220，總回報812.0949640287661


 85%|████████████████████████████▊     | 4230/5000 [12:16:30<3:25:32, 16.02s/it]


訓練次數4230，總回報534.7852459016358


 85%|████████████████████████████▊     | 4240/5000 [12:18:03<2:18:17, 10.92s/it]


訓練次數4240，總回報500.35079365079184


 85%|████████████████████████████▉     | 4250/5000 [12:20:55<4:09:18, 19.95s/it]


訓練次數4250，總回報913.5885906040129


 85%|████████████████████████████▉     | 4260/5000 [12:22:42<2:59:11, 14.53s/it]


訓練次數4260，總回報633.8751677852295


 85%|█████████████████████████████     | 4270/5000 [12:24:47<3:13:47, 15.93s/it]


訓練次數4270，總回報620.3038022813632


 86%|█████████████████████████████     | 4280/5000 [12:26:54<2:37:51, 13.16s/it]


訓練次數4280，總回報903.2483870967661


 86%|█████████████████████████████▏    | 4290/5000 [12:28:41<2:42:13, 13.71s/it]


訓練次數4290，總回報568.7216216216174


 86%|█████████████████████████████▏    | 4300/5000 [12:30:33<2:35:11, 13.30s/it]


訓練次數4300，總回報458.921126760562


 86%|█████████████████████████████▎    | 4310/5000 [12:32:18<2:35:48, 13.55s/it]


訓練次數4310，總回報669.8639097744286


 86%|█████████████████████████████▍    | 4320/5000 [12:34:18<2:56:27, 15.57s/it]


訓練次數4320，總回報726.0551724137841


 87%|█████████████████████████████▍    | 4330/5000 [12:36:32<3:01:28, 16.25s/it]


訓練次數4330，總回報379.27692307692286


 87%|█████████████████████████████▌    | 4340/5000 [12:38:41<3:05:11, 16.84s/it]


訓練次數4340，總回報919.5093632958732


 87%|█████████████████████████████▌    | 4350/5000 [12:40:32<2:36:02, 14.40s/it]


訓練次數4350，總回報556.9999999999953


 87%|█████████████████████████████▋    | 4360/5000 [12:41:58<1:43:46,  9.73s/it]


訓練次數4360，總回報54.70660066006592


 87%|█████████████████████████████▋    | 4370/5000 [12:43:47<2:17:22, 13.08s/it]


訓練次數4370，總回報636.2327645051116


 88%|█████████████████████████████▊    | 4380/5000 [12:45:34<2:44:48, 15.95s/it]


訓練次數4380，總回報899.5298507462533


 88%|█████████████████████████████▊    | 4390/5000 [12:47:42<2:30:19, 14.79s/it]


訓練次數4390，總回報658.2635451504946


 88%|█████████████████████████████▉    | 4400/5000 [12:49:40<2:39:39, 15.97s/it]


訓練次數4400，總回報839.8408163265208


 88%|█████████████████████████████▉    | 4410/5000 [12:51:42<2:58:58, 18.20s/it]


訓練次數4410，總回報795.6868686868575


 88%|██████████████████████████████    | 4420/5000 [12:54:02<2:31:48, 15.70s/it]


訓練次數4420，總回報908.1333333333255


 89%|██████████████████████████████    | 4430/5000 [12:55:52<1:28:51,  9.35s/it]


訓練次數4430，總回報61.06435986159163


 89%|██████████████████████████████▏   | 4440/5000 [12:57:29<2:15:45, 14.55s/it]


訓練次數4440，總回報908.1106918238826


 89%|██████████████████████████████▎   | 4450/5000 [12:59:05<2:06:55, 13.85s/it]


訓練次數4450，總回報529.2927152317848


 89%|██████████████████████████████▎   | 4460/5000 [13:01:01<1:57:40, 13.07s/it]


訓練次數4460，總回報312.3821192052982


 89%|██████████████████████████████▍   | 4470/5000 [13:03:28<2:49:55, 19.24s/it]


訓練次數4470，總回報695.9122807017441


 90%|██████████████████████████████▍   | 4480/5000 [13:05:41<1:56:22, 13.43s/it]


訓練次數4480，總回報199.51433021806943


 90%|██████████████████████████████▌   | 4490/5000 [13:08:02<2:33:55, 18.11s/it]


訓練次數4490，總回報660.2571428571359


 90%|██████████████████████████████▌   | 4500/5000 [13:10:07<2:21:53, 17.03s/it]


訓練次數4500，總回報493.79230769230486


 90%|██████████████████████████████▋   | 4510/5000 [13:11:55<1:57:50, 14.43s/it]


訓練次數4510，總回報918.5057553956709


 90%|██████████████████████████████▋   | 4520/5000 [13:13:48<1:55:39, 14.46s/it]


訓練次數4520，總回報919.4650557620724


 91%|██████████████████████████████▊   | 4530/5000 [13:16:08<2:02:03, 15.58s/it]


訓練次數4530，總回報918.5779922779785


 91%|██████████████████████████████▊   | 4540/5000 [13:18:04<1:45:05, 13.71s/it]


訓練次數4540，總回報572.8975778546688


 91%|██████████████████████████████▉   | 4550/5000 [13:20:00<1:46:14, 14.17s/it]


訓練次數4550，總回報858.9641221373997


 91%|███████████████████████████████   | 4560/5000 [13:22:34<2:09:49, 17.70s/it]


訓練次數4560，總回報916.4355371900764


 91%|███████████████████████████████   | 4570/5000 [13:24:46<2:08:08, 17.88s/it]


訓練次數4570，總回報854.6932203389722


 92%|███████████████████████████████▏  | 4580/5000 [13:27:05<1:55:29, 16.50s/it]


訓練次數4580，總回報845.1515923566731


 92%|███████████████████████████████▏  | 4590/5000 [13:29:28<2:14:59, 19.76s/it]


訓練次數4590，總回報913.7064935064786


 92%|███████████████████████████████▎  | 4600/5000 [13:31:45<1:36:13, 14.43s/it]


訓練次數4600，總回報466.08701298701163


 92%|███████████████████████████████▎  | 4610/5000 [13:33:24<1:21:57, 12.61s/it]


訓練次數4610，總回報673.0411764705821


 92%|███████████████████████████████▍  | 4620/5000 [13:35:17<1:38:14, 15.51s/it]


訓練次數4620，總回報742.0438356164283


 93%|███████████████████████████████▍  | 4630/5000 [13:36:40<1:15:44, 12.28s/it]


訓練次數4630，總回報913.600729926993


 93%|███████████████████████████████▌  | 4640/5000 [13:38:38<1:29:36, 14.93s/it]


訓練次數4640，總回報918.895437262343


 93%|███████████████████████████████▌  | 4650/5000 [13:40:35<1:26:26, 14.82s/it]


訓練次數4650，總回報915.5328621908005


 93%|███████████████████████████████▋  | 4660/5000 [13:42:52<1:32:45, 16.37s/it]


訓練次數4660，總回報905.3640522875628


 93%|███████████████████████████████▊  | 4670/5000 [13:45:15<1:23:59, 15.27s/it]


訓練次數4670，總回報127.65719063545193


 94%|███████████████████████████████▊  | 4680/5000 [13:47:03<1:06:02, 12.38s/it]


訓練次數4680，總回報80.73684210526325


 94%|█████████████████████████████████▊  | 4690/5000 [13:48:39<54:24, 10.53s/it]


訓練次數4690，總回報525.3680851063805


 94%|███████████████████████████████▉  | 4700/5000 [13:50:36<1:04:51, 12.97s/it]


訓練次數4700，總回報702.5124999999946


 94%|████████████████████████████████  | 4710/5000 [13:52:29<1:03:27, 13.13s/it]


訓練次數4710，總回報504.5597122302118


 94%|████████████████████████████████  | 4720/5000 [13:54:40<1:11:55, 15.41s/it]


訓練次數4720，總回報523.4265734265716


 95%|████████████████████████████████▏ | 4730/5000 [13:56:39<1:03:14, 14.05s/it]


訓練次數4730，總回報903.6853420195262


 95%|██████████████████████████████████▏ | 4740/5000 [13:58:46<57:24, 13.25s/it]


訓練次數4740，總回報84.3399293286219


 95%|██████████████████████████████████▏ | 4750/5000 [14:00:19<52:55, 12.70s/it]


訓練次數4750，總回報415.66296296296207


 95%|████████████████████████████████▎ | 4760/5000 [14:02:30<1:02:46, 15.69s/it]


訓練次數4760，總回報882.9724738675851


 95%|████████████████████████████████▍ | 4770/5000 [14:04:57<1:13:13, 19.10s/it]


訓練次數4770，總回報912.1494584837515


 96%|██████████████████████████████████▍ | 4780/5000 [14:06:46<39:02, 10.65s/it]


訓練次數4780，總回報127.2534050179215


 96%|████████████████████████████████▌ | 4790/5000 [14:09:02<1:00:16, 17.22s/it]


訓練次數4790，總回報924.635483870958


 96%|██████████████████████████████████▌ | 4800/5000 [14:10:57<41:17, 12.39s/it]


訓練次數4800，總回報523.3971119133538


 96%|██████████████████████████████████▋ | 4810/5000 [14:12:31<43:03, 13.60s/it]


訓練次數4810，總回報514.3503759398452


 96%|██████████████████████████████████▋ | 4820/5000 [14:14:00<35:34, 11.86s/it]


訓練次數4820，總回報506.39999999999753


 97%|██████████████████████████████████▊ | 4830/5000 [14:16:03<38:54, 13.73s/it]


訓練次數4830，總回報919.9315412186287


 97%|██████████████████████████████████▊ | 4840/5000 [14:17:56<34:28, 12.93s/it]


訓練次數4840，總回報922.8199261992569


 97%|██████████████████████████████████▉ | 4850/5000 [14:19:38<32:45, 13.10s/it]


訓練次數4850，總回報346.4684210526303


 97%|██████████████████████████████████▉ | 4860/5000 [14:21:16<29:53, 12.81s/it]


訓練次數4860，總回報624.4192052980102


 97%|███████████████████████████████████ | 4870/5000 [14:23:09<32:23, 14.95s/it]


訓練次數4870，總回報903.9106918238814


 98%|███████████████████████████████████▏| 4880/5000 [14:25:08<26:16, 13.13s/it]


訓練次數4880，總回報925.8317991631692


 98%|███████████████████████████████████▏| 4890/5000 [14:26:48<20:01, 10.93s/it]


訓練次數4890，總回報98.79534050179225


 98%|███████████████████████████████████▎| 4900/5000 [14:28:41<25:02, 15.02s/it]


訓練次數4900，總回報417.57447447447225


 98%|███████████████████████████████████▎| 4910/5000 [14:30:40<20:14, 13.49s/it]


訓練次數4910，總回報684.6525423728744


 98%|███████████████████████████████████▍| 4920/5000 [14:32:22<17:28, 13.11s/it]


訓練次數4920，總回報393.7104377104352


 99%|███████████████████████████████████▍| 4930/5000 [14:34:08<16:18, 13.98s/it]


訓練次數4930，總回報908.2853420195269


 99%|███████████████████████████████████▌| 4940/5000 [14:35:50<13:22, 13.38s/it]


訓練次數4940，總回報797.8318681318617


 99%|███████████████████████████████████▋| 4950/5000 [14:37:59<15:03, 18.07s/it]


訓練次數4950，總回報914.1203389830401


 99%|███████████████████████████████████▋| 4960/5000 [14:40:12<10:51, 16.28s/it]


訓練次數4960，總回報65.89077490774902


 99%|███████████████████████████████████▊| 4970/5000 [14:41:58<07:25, 14.85s/it]


訓練次數4970，總回報861.2968503936954


100%|███████████████████████████████████▊| 4980/5000 [14:44:14<05:35, 16.80s/it]


訓練次數4980，總回報843.8635761589322


100%|███████████████████████████████████▉| 4990/5000 [14:46:36<03:19, 19.94s/it]


訓練次數4990，總回報915.9554817275695


100%|████████████████████████████████████| 5000/5000 [14:48:38<00:00, 10.66s/it]


訓練次數5000，總回報905.357142857129





In [None]:
Agent.Record()

916.2242424242297
