# import 必要套件

In [None]:
import random
import time,math
import numpy as np
import gymnasium as gym
import gymnasium.wrappers as gym_wrap
import matplotlib.pyplot as plt
import matplotlib.animation as animation #輸出動畫影片
from IPython import display
from tqdm import tqdm

In [None]:
import torch
import torch.nn.functional as F
import collections
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
class ImageEnv(gym.Wrapper):
  def __init__(self,env,stack_frames=4,delay_op=50):
    super(ImageEnv, self).__init__(env)
    self.delay_op = delay_op
    self.stack_frames = stack_frames
  def reset(self):
    s, info = self.env.reset()
    for i in range(self.delay_op):
      s, r, terminated, truncated, info = self.env.step(0)
      s=s[:72, 12:84]/255.0-0.5
      self.stacked_state = np.tile( s , (self.stack_frames,1,1) )  # [4, 84, 84]
    return self.stacked_state, info

  def step(self, action):
    reward = 0
    for _ in range(self.stack_frames):
      s, r, terminated, truncated, info = self.env.step(action)
      if r==-100:terminated=True
      s=s[:72, 12:84]/255.0-0.5
      reward += r
      if terminated or truncated:break
      self.stacked_state = np.concatenate((self.stacked_state[1:], s[np.newaxis]), axis=0)
    return self.stacked_state, reward, terminated, truncated, info

# 建立Replay Buffer類別

In [None]:
class ReplayBuffer:
  def __init__(self,max_size=int(1e5), num_steps=1):
    self.s = np.zeros((max_size,4,72,72), dtype=np.float32)
    self.a = np.zeros((max_size,), dtype=np.int64)
    self.r = np.zeros((max_size, 1), dtype=np.float32)
    self.s_ = np.zeros((max_size,4,72,72), dtype=np.float32)
    self.done = np.zeros((max_size, 1), dtype=np.float32)
    self.ptr = 0
    self.size = 0
    self.max_size = max_size
    self.num_steps = num_steps

  def append(self,s,a,r,s_,done):
    self.s[self.ptr] = s
    self.a[self.ptr] = a
    self.r[self.ptr] = r
    self.s_[self.ptr] = s_
    self.done[self.ptr] = done
    self.ptr = (self.ptr + 1) % self.max_size
    self.size = min(self.size+1,self.max_size)
  def sample(self, batch_size):
    ind = np.random.randint(0, self.size, batch_size)
    return torch.FloatTensor(self.s[ind]),torch.LongTensor(self.a[ind]),torch.FloatTensor(self.r[ind]),torch.FloatTensor(self.s_[ind]),torch.FloatTensor(self.done[ind])

# 搭建DQN神經網路的類別

In [None]:
class DQN(torch.nn.Module):
  def __init__(self,n_act):
    super(DQN,self).__init__()
    self.conv1 = torch.nn.Conv2d(4, 16, kernel_size=4, stride=4)  #[N,4,72,72]->[N,16,18,18]
    self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=4, stride=2)  #[N,16,18,18]->[N,32,8,8]
    self.fc1 = torch.nn.Linear(32 * 8 * 8, 256)
    self.fc2 = torch.nn.Linear(256, n_act)
  def forward(self,x):
    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    x = x.view((-1, 32 * 8 * 8))
    x = self.fc1(x)
    x = self.fc2(x)
    return x

# 設定是否載入模型參數，舊參數檔路徑，新參數檔路徑

In [None]:
Load_File=0
Old_File=f"Model-{Load_File}.pt"
if Load_File>0:
  Log= np.load(f"Log-{Load_File}.npy", allow_pickle=True).item()
else:
  Log={"TrainReward":[],"TestReward":[],"Loss":[]}

In [None]:
env=gym.make('CarRacing-v3',render_mode="rgb_array",domain_randomize=False, continuous=False)
env = gym_wrap.GrayscaleObservation(env)
env = ImageEnv(env)

# 搭建智能體Agent的類別

In [None]:
class DQNAgent():
  def __init__(self,gamma=0.9,eps_low=0.1,lr=0.00025):
    self.env = env
    self.n_act=self.env.action_space.n
    self.PredictDQN= DQN(self.n_act)
    self.TargetDQN= DQN(self.n_act)
    if Load_File>0:
      self.PredictDQN.load_state_dict(torch.load(Old_File))
      self.TargetDQN.load_state_dict(torch.load(Old_File))
    self.PredictDQN.to(device)
    self.TargetDQN.to(device)
    self.LossFun=torch.nn.SmoothL1Loss()
    self.optimizer=torch.optim.Adam(self.PredictDQN.parameters(),lr=lr)
    self.gamma=gamma
    self.eps_low=eps_low
    self.rb=ReplayBuffer(max_size=10000, num_steps=1)
  def PredictA(self,s):
    with torch.no_grad():
      return torch.argmax(self.PredictDQN(torch.FloatTensor(s).to(device))).item()
  def SelectA(self,a):
    return self.env.action_space.sample() if np.random.random()<self.EPS else a
  def Train(self,N_EPISODES):
    for i in tqdm(range(Load_File,N_EPISODES)):
      self.EPS=self.eps_low+(1-self.eps_low)*math.exp(-i*12/(N_EPISODES))
      total_reward=0
      s,_=self.env.reset()
      while True:
        a=self.SelectA(self.PredictA(s))
        s_,r,done,stop,_=self.env.step(a)
        self.rb.append(s,a,r,s_,done)
        if self.rb.size > 200 and i%self.rb.num_steps==0:self.Learn()
        if i % 20==0:  self.TargetDQN.load_state_dict(self.PredictDQN.state_dict())
        s=s_
        total_reward+=r
        if done or stop:break
      # print(f"\n{total_reward}")
      Log["TrainReward"].append(total_reward)
      if i % 10 == 9:
        test_reward=self.Test()
        print(f"\n訓練次數{i+1}，總回報{test_reward}")
        Log["TestReward"].append(test_reward)
        torch.save(self.PredictDQN.state_dict(), f"Model-{i+1}.pt")
        np.save(f"Log-{i+1}.npy", Log)
  def Learn(self):
    self.optimizer.zero_grad()
    batch_s, batch_a, batch_r, batch_s_, batch_done=self.rb.sample(32)
    predict_Q = (self.PredictDQN(batch_s.to(device))*F.one_hot(batch_a.to(device),self.n_act)).sum(1,keepdim=True)
    with torch.no_grad():
      # target_Q = batch_r.to(device)+(1-batch_done.to(device))*self.gamma*self.TargetDQN(batch_s_.to(device)).max(1,keepdims=True)[0]

        a_ = self.PredictDQN(batch_s_.to(device)).max(dim=1)[1]
        #新增 Target Q-Network 來估算Q值
        target_Q = batch_r.to(device)+(1-batch_done.to(device))*self.gamma*(self.TargetDQN(batch_s_.to(device))*F.one_hot(a_.long(),self.n_act)).sum(1,keepdim=True)

        # print(predict_Q.size(),target_Q.size())
    loss = self.LossFun(predict_Q, target_Q)
    Log["Loss"].append(float(loss))
    loss.backward()
    self.optimizer.step()
  def Test(self,VIDEO=False):
    total_reward=0
    video=[]
    s,_=self.env.reset()
    while True:
      video.append(self.env.render())
      a=self.PredictA(s)
      s,r,done,stop,_=self.env.step(a)
      total_reward+=r
      if done or stop:break
    if VIDEO:
      patch = plt.imshow(video[0]) #產生展示圖形物件
      plt.axis('off') #關閉坐標軸
      def animate(i): #設定更換影格的函數
        patch.set_data(video[i])
        #plt.gcf()=>建新繪圖區 animate=>更換影格函數 frames=>影格數 interval=>影隔間距(毫秒)
      anim = animation.FuncAnimation(plt.gcf(),animate,frames=len(video),interval=200)
      anim.save('Car_Racing.mp4') #儲存為mp4擋
    return total_reward
  def Record(self):
    total_reward=0
    s,_=self.env.reset()
    while True:
      image=self.env.render()
      plt.imshow(image)
      #plt.imsave(f"/content/drive/MyDrive/recording/{str(int(time.time()))}.png", image)
      a=self.PredictA(s)
      s,r,done,stop,_=self.env.step(a)
      print(r)
      total_reward+=r
      plt.pause(0.1)
      #清除目前的顯示
      display.clear_output(wait=True)
      if done or stop:break
    print(total_reward)

In [None]:
Agent=DQNAgent(gamma=0.95,eps_low=0.05,lr=0.00025)
Agent.Train(N_EPISODES=5000)

  0%|                                      | 10/5000 [01:59<21:32:48, 15.54s/it]


訓練次數10，總回報-94.99999999999895


  0%|▏                                     | 20/5000 [04:18<23:06:03, 16.70s/it]


訓練次數20，總回報-94.99999999999895


  1%|▏                                     | 30/5000 [06:07<17:44:49, 12.86s/it]


訓練次數30，總回報-94.99999999999895


  1%|▎                                     | 40/5000 [07:56<18:41:54, 13.57s/it]


訓練次數40，總回報-94.99999999999896


  1%|▍                                     | 50/5000 [10:02<21:59:20, 15.99s/it]


訓練次數50，總回報-94.999999999999


  1%|▍                                     | 60/5000 [11:55<20:40:11, 15.06s/it]


訓練次數60，總回報-74.5221843003414


  1%|▌                                     | 70/5000 [13:31<15:36:12, 11.39s/it]


訓練次數70，總回報8.594339622641527


  2%|▌                                     | 80/5000 [15:13<20:21:00, 14.89s/it]


訓練次數80，總回報-94.99999999999898


  2%|▋                                     | 90/5000 [16:52<19:14:34, 14.11s/it]


訓練次數90，總回報-94.99999999999898


  2%|▊                                     | 100/5000 [18:11<9:32:41,  7.01s/it]


訓練次數100，總回報6.000000000000018


  2%|▊                                    | 110/5000 [19:26<16:27:37, 12.12s/it]


訓練次數110，總回報-94.99999999999903


  2%|▉                                    | 120/5000 [20:52<15:54:16, 11.73s/it]


訓練次數120，總回報-94.99999999999903


  3%|▉                                    | 130/5000 [22:09<14:03:22, 10.39s/it]


訓練次數130，總回報-94.999999999999


  3%|█                                    | 140/5000 [23:48<18:04:15, 13.39s/it]


訓練次數140，總回報10.867844522968214


  3%|█                                    | 150/5000 [25:27<16:42:01, 12.40s/it]


訓練次數150，總回報2.380622837370245


  3%|█▏                                    | 160/5000 [26:27<9:59:05,  7.43s/it]


訓練次數160，總回報2.363444108761331


  3%|█▎                                   | 170/5000 [27:44<11:23:14,  8.49s/it]


訓練次數170，總回報16.80645161290323


  4%|█▎                                   | 180/5000 [29:12<10:16:21,  7.67s/it]


訓練次數180，總回報19.268794326241114


  4%|█▍                                    | 190/5000 [30:17<8:36:04,  6.44s/it]


訓練次數190，總回報51.282389937106835


  4%|█▍                                   | 200/5000 [31:32<12:09:04,  9.11s/it]


訓練次數200，總回報11.761245674740504


  4%|█▌                                   | 210/5000 [33:03<10:09:46,  7.64s/it]


訓練次數210，總回報8.129032258064534


  4%|█▋                                   | 220/5000 [34:33<10:05:29,  7.60s/it]


訓練次數220，總回報6.129315960912066


  5%|█▋                                   | 230/5000 [35:57<14:32:56, 10.98s/it]


訓練次數230，總回報-3.4708860759493714


  5%|█▊                                    | 240/5000 [37:02<7:55:35,  5.99s/it]


訓練次數240，總回報-2.997278911564632


  5%|█▉                                    | 250/5000 [37:40<5:15:38,  3.99s/it]


訓練次數250，總回報0.2800738007380166


  5%|█▉                                    | 260/5000 [38:22<6:20:35,  4.82s/it]


訓練次數260，總回報-0.6064935064935085


  5%|██                                    | 270/5000 [39:08<7:26:43,  5.67s/it]


訓練次數270，總回報-2.806993006993012


  6%|██▏                                   | 280/5000 [39:49<5:28:57,  4.18s/it]


訓練次數280，總回報-0.23154121863799215


  6%|██▏                                   | 290/5000 [40:48<8:58:28,  6.86s/it]


訓練次數290，總回報2.2209302325581395


  6%|██▏                                  | 300/5000 [41:51<14:01:26, 10.74s/it]


訓練次數300，總回報-94.99999999999903


  6%|██▎                                  | 310/5000 [42:56<10:44:47,  8.25s/it]


訓練次數310，總回報23.599999999999973


  6%|██▍                                   | 320/5000 [43:45<7:11:03,  5.53s/it]


訓練次數320，總回報19.569750889679696


  7%|██▌                                   | 330/5000 [44:30<6:50:38,  5.28s/it]


訓練次數330，總回報13.733333333333347


  7%|██▌                                   | 340/5000 [45:18<6:04:47,  4.70s/it]


訓練次數340，總回報20.761490683229795


  7%|██▋                                   | 350/5000 [46:14<8:14:37,  6.38s/it]


訓練次數350，總回報11.75189873417723


  7%|██▋                                  | 360/5000 [47:34<10:31:52,  8.17s/it]


訓練次數360，總回報45.93706293706284


  7%|██▋                                  | 370/5000 [48:51<11:39:55,  9.07s/it]


訓練次數370，總回報58.20588235294104


  8%|██▊                                  | 380/5000 [50:15<12:18:56,  9.60s/it]


訓練次數380，總回報58.28477508650503


  8%|██▉                                   | 390/5000 [51:06<6:36:15,  5.16s/it]


訓練次數390，總回報48.371841155234584


  8%|███                                   | 400/5000 [52:10<9:18:24,  7.28s/it]


訓練次數400，總回報23.444378698224835


  8%|███                                   | 410/5000 [53:01<7:56:59,  6.24s/it]


訓練次數410，總回報43.347058823529345


  8%|███▏                                  | 420/5000 [53:53<6:54:45,  5.43s/it]


訓練次數420，總回報40.25185185185178


  9%|███▎                                  | 430/5000 [54:51<8:21:23,  6.58s/it]


訓練次數430，總回報35.123322683706014


  9%|███▎                                  | 440/5000 [55:37<6:27:32,  5.10s/it]


訓練次數440，總回報39.59489051094884


  9%|███▍                                  | 450/5000 [56:47<9:10:38,  7.26s/it]


訓練次數450，總回報33.267796610169434


  9%|███▍                                  | 460/5000 [57:51<8:10:06,  6.48s/it]


訓練次數460，總回報48.845182724252346


  9%|███▍                                 | 470/5000 [59:08<13:02:35, 10.37s/it]


訓練次數470，總回報77.98358208955231


 10%|███▍                                | 480/5000 [1:00:07<9:33:55,  7.62s/it]


訓練次數480，總回報119.407473309609


 10%|███▍                               | 490/5000 [1:01:16<13:03:17, 10.42s/it]


訓練次數490，總回報54.75938566552892


 10%|███▌                               | 500/5000 [1:02:40<12:32:51, 10.04s/it]


訓練次數500，總回報128.57703180212056


 10%|███▋                                | 510/5000 [1:03:39<7:06:35,  5.70s/it]


訓練次數510，總回報49.39426751592347


 10%|███▋                                | 520/5000 [1:04:42<8:57:54,  7.20s/it]


訓練次數520，總回報58.77692307692296


 11%|███▊                                | 530/5000 [1:05:30<6:05:50,  4.91s/it]


訓練次數530，總回報58.87407407407393


 11%|███▉                                | 540/5000 [1:06:16<6:11:01,  4.99s/it]


訓練次數540，總回報30.66710963455146


 11%|███▊                               | 550/5000 [1:07:31<11:00:37,  8.91s/it]


訓練次數550，總回報111.08411552346602


 11%|███▉                               | 560/5000 [1:09:01<14:32:41, 11.79s/it]


訓練次數560，總回報118.92578397212586


 11%|████                                | 570/5000 [1:10:20<9:17:14,  7.55s/it]


訓練次數570，總回報37.545255474452496


 12%|████                               | 580/5000 [1:12:27<17:23:02, 14.16s/it]


訓練次數580，總回報149.18333333333416


 12%|████▏                               | 590/5000 [1:13:22<6:12:01,  5.06s/it]


訓練次數590，總回報21.605405405405385


 12%|████▎                               | 600/5000 [1:14:23<7:53:08,  6.45s/it]


訓練次數600，總回報51.617391304347734


 12%|████▍                               | 610/5000 [1:15:12<6:02:18,  4.95s/it]


訓練次數610，總回報31.7094339622641


 12%|████▎                              | 620/5000 [1:16:24<12:19:50, 10.13s/it]


訓練次數620，總回報75.62560553633217


 13%|████▌                               | 630/5000 [1:17:26<9:12:57,  7.59s/it]


訓練次數630，總回報139.1745519713268


 13%|████▌                               | 640/5000 [1:18:15<7:02:05,  5.81s/it]


訓練次數640，總回報40.68013245033103


 13%|████▌                              | 650/5000 [1:19:45<12:35:04, 10.41s/it]


訓練次數650，總回報76.78421052631586


 13%|████▌                              | 660/5000 [1:21:13<11:07:31,  9.23s/it]


訓練次數660，總回報71.64259927797833


 13%|████▋                              | 670/5000 [1:22:36<14:52:05, 12.36s/it]


訓練次數670，總回報49.80588235294109


 14%|████▉                               | 680/5000 [1:23:43<9:21:35,  7.80s/it]


訓練次數680，總回報61.38888888888876


 14%|████▊                              | 690/5000 [1:25:35<18:26:00, 15.40s/it]


訓練次數690，總回報196.7668621700902


 14%|█████                               | 700/5000 [1:26:48<8:44:23,  7.32s/it]


訓練次數700，總回報61.82456747404835


 14%|████▉                              | 710/5000 [1:28:45<16:49:32, 14.12s/it]


訓練次數710，總回報224.3571428571449


 14%|█████                              | 720/5000 [1:30:02<12:06:25, 10.18s/it]


訓練次數720，總回報46.19324758842434


 15%|█████▎                              | 730/5000 [1:30:47<6:05:17,  5.13s/it]


訓練次數730，總回報42.53193916349802


 15%|█████▎                              | 740/5000 [1:31:37<5:58:46,  5.05s/it]


訓練次數740，總回報35.39498207885299


 15%|█████▎                             | 750/5000 [1:32:59<11:49:21, 10.01s/it]


訓練次數750，總回報145.13287671232936


 15%|█████▎                             | 760/5000 [1:34:02<10:08:22,  8.61s/it]


訓練次數760，總回報152.26529968454346


 15%|█████▌                              | 770/5000 [1:35:08<8:24:27,  7.16s/it]


訓練次數770，總回報72.24208754208757


 16%|█████▍                             | 780/5000 [1:36:19<10:43:51,  9.15s/it]


訓練次數780，總回報135.1342465753429


 16%|█████▌                             | 790/5000 [1:38:05<13:48:57, 11.81s/it]


訓練次數790，總回報118.30547945205508


 16%|█████▌                             | 800/5000 [1:39:43<14:47:31, 12.68s/it]


訓練次數800，總回報142.71724137931085


 16%|█████▋                             | 810/5000 [1:41:20<10:55:32,  9.39s/it]


訓練次數810，總回報145.7000000000005


 16%|█████▋                             | 820/5000 [1:42:53<14:52:35, 12.81s/it]


訓練次數820，總回報207.52767295597639


 17%|█████▊                             | 830/5000 [1:44:51<13:28:33, 11.63s/it]


訓練次數830，總回報315.86177474402666


 17%|██████                              | 840/5000 [1:45:47<7:51:54,  6.81s/it]


訓練次數840，總回報109.15129151291542


 17%|██████                              | 850/5000 [1:46:47<7:33:29,  6.56s/it]


訓練次數850，總回報176.2510703363926


 17%|██████▏                             | 860/5000 [1:47:45<7:05:18,  6.16s/it]


訓練次數860，總回報50.95789473684203


 17%|██████                             | 870/5000 [1:49:00<10:04:35,  8.78s/it]


訓練次數870，總回報84.33687943262427


 18%|██████▎                             | 880/5000 [1:50:26<8:43:05,  7.62s/it]


訓練次數880，總回報49.02509363295872


 18%|██████▏                            | 890/5000 [1:52:03<10:16:24,  9.00s/it]


訓練次數890，總回報81.11010101010105


 18%|██████▎                            | 900/5000 [1:53:44<11:52:11, 10.42s/it]


訓練次數900，總回報87.42341137123762


 18%|██████▎                            | 910/5000 [1:55:10<10:09:27,  8.94s/it]


訓練次數910，總回報97.36486486486496


 18%|██████▍                            | 920/5000 [1:56:58<13:14:03, 11.68s/it]


訓練次數920，總回報416.190476190474


 19%|██████▋                             | 930/5000 [1:58:22<9:44:49,  8.62s/it]


訓練次數930，總回報205.54426229508312


 19%|██████▊                             | 940/5000 [1:59:20<6:07:24,  5.43s/it]


訓練次數940，總回報104.72360248447228


 19%|██████▋                            | 950/5000 [2:00:49<13:50:44, 12.31s/it]


訓練次數950，總回報148.73287671232922


 19%|██████▋                            | 960/5000 [2:02:33<14:46:25, 13.16s/it]


訓練次數960，總回報130.34237288135628


 19%|██████▉                             | 970/5000 [2:03:56<9:39:31,  8.63s/it]


訓練次數970，總回報349.85825242718306


 20%|██████▊                            | 980/5000 [2:05:32<13:34:52, 12.16s/it]


訓練次數980，總回報136.9228739002938


 20%|███████▏                            | 990/5000 [2:06:46<8:52:45,  7.97s/it]


訓練次數990，總回報101.70536912751706


 20%|███████                            | 1000/5000 [2:07:54<7:22:53,  6.64s/it]


訓練次數1000，總回報30.50396039603956


 20%|██████▊                           | 1010/5000 [2:09:18<12:46:00, 11.52s/it]


訓練次數1010，總回報260.6691275167795


 20%|███████▏                           | 1020/5000 [2:10:37<7:07:34,  6.45s/it]


訓練次數1020，總回報113.34823151125428


 21%|███████                           | 1030/5000 [2:12:20<13:48:03, 12.51s/it]


訓練次數1030，總回報115.04210526315822


 21%|███████                           | 1040/5000 [2:13:36<10:30:37,  9.55s/it]


訓練次數1040，總回報71.39271523178809


 21%|███████▏                          | 1050/5000 [2:15:22<11:20:14, 10.33s/it]


訓練次數1050，總回報39.004950495049435


 21%|███████▍                           | 1060/5000 [2:16:26<9:19:14,  8.52s/it]


訓練次數1060，總回報55.75938566552893


 21%|███████▍                           | 1070/5000 [2:17:26<9:24:32,  8.62s/it]


訓練次數1070，總回報332.8586466165402


 22%|███████▎                          | 1080/5000 [2:18:44<10:53:17, 10.00s/it]


訓練次數1080，總回報266.0470198675503


 22%|███████▋                           | 1090/5000 [2:20:09<9:32:40,  8.79s/it]


訓練次數1090，總回報151.23130699088225


 22%|███████▍                          | 1100/5000 [2:21:57<13:22:07, 12.34s/it]


訓練次數1100，總回報278.85483870967767


 22%|███████▊                           | 1110/5000 [2:23:17<9:22:11,  8.67s/it]


訓練次數1110，總回報317.7890410958899


 22%|███████▌                          | 1120/5000 [2:24:34<13:01:55, 12.09s/it]


訓練次數1120，總回報262.3465587044544


 23%|███████▋                          | 1130/5000 [2:25:58<12:07:39, 11.28s/it]


訓練次數1130，總回報77.98350515463925


 23%|███████▉                           | 1140/5000 [2:27:13<7:54:36,  7.38s/it]


訓練次數1140，總回報170.2000000000008


 23%|████████                           | 1150/5000 [2:28:26<9:20:00,  8.73s/it]


訓練次數1150，總回報181.45032679738645


 23%|████████                           | 1160/5000 [2:29:41<9:25:25,  8.83s/it]


訓練次數1160，總回報247.61886792452958


 23%|███████▉                          | 1170/5000 [2:31:00<10:53:27, 10.24s/it]


訓練次數1170，總回報370.656249999999


 24%|████████▎                          | 1180/5000 [2:32:25<8:26:37,  7.96s/it]


訓練次數1180，總回報49.20149253731338


 24%|████████                          | 1190/5000 [2:33:51<10:29:06,  9.91s/it]


訓練次數1190，總回報244.72514970060064


 24%|████████▏                         | 1200/5000 [2:35:16<13:39:55, 12.95s/it]


訓練次數1200，總回報495.6423357664202


 24%|████████▍                          | 1210/5000 [2:36:17<7:31:23,  7.15s/it]


訓練次數1210，總回報29.644776119402955


 24%|████████▌                          | 1220/5000 [2:37:32<9:00:50,  8.58s/it]


訓練次數1220，總回報238.09619377162704


 25%|████████▌                          | 1230/5000 [2:38:43<8:50:54,  8.45s/it]


訓練次數1230，總回報315.42034632034597


 25%|████████▍                         | 1240/5000 [2:39:47<11:09:21, 10.68s/it]


訓練次數1240，總回報37.51904761904757


 25%|████████▌                         | 1250/5000 [2:41:49<15:47:29, 15.16s/it]


訓練次數1250，總回報95.88278145695382


 25%|████████▌                         | 1260/5000 [2:43:22<12:02:04, 11.58s/it]


訓練次數1260，總回報105.30536398467441


 25%|████████▋                         | 1270/5000 [2:44:49<12:31:58, 12.10s/it]


訓練次數1270，總回報219.92413793103518


 26%|████████▋                         | 1280/5000 [2:46:19<13:17:33, 12.86s/it]


訓練次數1280，總回報302.91605839416115


 26%|█████████                          | 1290/5000 [2:48:00<9:32:18,  9.26s/it]


訓練次數1290，總回報43.749999999999915


 26%|████████▊                         | 1300/5000 [2:49:23<11:01:51, 10.73s/it]


訓練次數1300，總回報238.9942760942777


 26%|████████▉                         | 1310/5000 [2:51:06<12:55:27, 12.61s/it]


訓練次數1310，總回報107.24212454212484


 26%|████████▉                         | 1320/5000 [2:52:54<13:17:26, 13.00s/it]


訓練次數1320，總回報365.3498257839709


 27%|█████████▎                         | 1330/5000 [2:54:19<9:32:57,  9.37s/it]


訓練次數1330，總回報351.5083916083914


 27%|█████████▍                         | 1340/5000 [2:55:29<9:54:28,  9.75s/it]


訓練次數1340，總回報159.9455830388697


 27%|█████████▏                        | 1350/5000 [2:57:18<11:49:47, 11.67s/it]


訓練次數1350，總回報515.5711743772193


 27%|█████████▌                         | 1360/5000 [2:58:35<8:12:01,  8.11s/it]


訓練次數1360，總回報322.19565217391283


 27%|█████████▎                        | 1370/5000 [3:00:10<13:27:06, 13.34s/it]


訓練次數1370，總回報399.9363636363611


 28%|█████████▍                        | 1380/5000 [3:02:05<12:55:25, 12.85s/it]


訓練次數1380，總回報568.7372262773677


 28%|█████████▍                        | 1390/5000 [3:04:04<14:23:01, 14.34s/it]


訓練次數1390，總回報392.6974358974344


 28%|█████████▌                        | 1400/5000 [3:05:38<10:13:46, 10.23s/it]


訓練次數1400，總回報440.99999999999824


 28%|█████████▌                        | 1410/5000 [3:07:40<15:55:35, 15.97s/it]


訓練次數1410，總回報364.6032258064506


 28%|█████████▋                        | 1420/5000 [3:09:12<13:35:40, 13.67s/it]


訓練次數1420，總回報479.1679389312949


 29%|█████████▋                        | 1430/5000 [3:11:05<11:40:14, 11.77s/it]


訓練次數1430，總回報358.8962406015023


 29%|█████████▊                        | 1440/5000 [3:13:02<16:00:50, 16.19s/it]


訓練次數1440，總回報720.1602605863065


 29%|█████████▊                        | 1450/5000 [3:14:19<10:55:28, 11.08s/it]


訓練次數1450，總回報344.02051282051275


 29%|█████████▉                        | 1460/5000 [3:15:54<10:45:22, 10.94s/it]


訓練次數1460，總回報400.8076923076907


 29%|█████████▉                        | 1470/5000 [3:17:50<12:00:45, 12.25s/it]


訓練次數1470，總回報288.29163498098916


 30%|██████████                        | 1480/5000 [3:19:46<15:00:16, 15.35s/it]


訓練次數1480，總回報121.19774436090259


 30%|██████████▏                       | 1490/5000 [3:21:40<14:25:25, 14.79s/it]


訓練次數1490，總回報516.5243243243209


 30%|██████████▏                       | 1500/5000 [3:23:28<14:00:40, 14.41s/it]


訓練次數1500，總回報306.32608695652186


 30%|██████████▎                       | 1510/5000 [3:25:34<13:29:27, 13.92s/it]


訓練次數1510，總回報288.4547703180216


 30%|██████████▎                       | 1520/5000 [3:27:05<10:01:16, 10.37s/it]


訓練次數1520，總回報263.7695906432757


 31%|██████████▋                        | 1530/5000 [3:28:38<9:53:01, 10.25s/it]


訓練次數1530，總回報194.70248447205077


 31%|██████████▍                       | 1540/5000 [3:30:34<13:25:45, 13.97s/it]


訓練次數1540，總回報864.7201342281733


 31%|██████████▌                       | 1550/5000 [3:32:19<13:06:47, 13.68s/it]


訓練次數1550，總回報338.9499999999995


 31%|██████████▌                       | 1560/5000 [3:34:13<14:23:39, 15.06s/it]


訓練次數1560，總回報264.7245033112591


 31%|██████████▉                        | 1570/5000 [3:35:42<9:44:07, 10.22s/it]


訓練次數1570，總回報264.16907216494883


 32%|██████████▋                       | 1580/5000 [3:37:31<11:04:27, 11.66s/it]


訓練次數1580，總回報265.95454545454584


 32%|██████████▊                       | 1590/5000 [3:38:50<10:13:15, 10.79s/it]


訓練次數1590，總回報318.4595317725756


 32%|██████████▉                       | 1600/5000 [3:40:42<12:49:43, 13.58s/it]


訓練次數1600，總回報170.06666666666712


 32%|███████████▎                       | 1610/5000 [3:42:04<8:26:41,  8.97s/it]


訓練次數1610，總回報359.32727272727215


 32%|███████████▎                       | 1620/5000 [3:43:19<8:01:56,  8.56s/it]


訓練次數1620，總回報309.6


 33%|███████████▍                       | 1630/5000 [3:44:39<8:07:17,  8.68s/it]


訓練次數1630，總回報129.12539682539725


 33%|███████████▍                       | 1640/5000 [3:46:03<8:47:51,  9.43s/it]


訓練次數1640，總回報186.6532710280383


 33%|███████████▏                      | 1650/5000 [3:47:47<10:42:12, 11.50s/it]


訓練次數1650，總回報222.27785234899397


 33%|███████████▌                       | 1660/5000 [3:49:10<9:51:13, 10.62s/it]


訓練次數1660，總回報412.8333333333319


 33%|███████████▎                      | 1670/5000 [3:51:09<11:52:38, 12.84s/it]


訓練次數1670，總回報465.5709342560534


 34%|███████████▊                       | 1680/5000 [3:52:47<9:34:32, 10.38s/it]


訓練次數1680，總回報227.05925925926053


 34%|███████████▊                       | 1690/5000 [3:54:00<7:15:53,  7.90s/it]


訓練次數1690，總回報107.71359223301


 34%|███████████▌                      | 1700/5000 [3:55:32<10:19:11, 11.26s/it]


訓練次數1700，總回報618.0217522658506


 34%|███████████▉                       | 1710/5000 [3:56:50<9:57:08, 10.89s/it]


訓練次數1710，總回報304.2294964028777


 34%|████████████                       | 1720/5000 [3:58:09<9:27:30, 10.38s/it]


訓練次數1720，總回報393.10849673202455


 35%|███████████▊                      | 1730/5000 [3:59:43<11:02:02, 12.15s/it]


訓練次數1730，總回報291.1604938271614


 35%|███████████▊                      | 1740/5000 [4:01:24<10:33:57, 11.67s/it]


訓練次數1740，總回報624.6999999999941


 35%|███████████▉                      | 1750/5000 [4:02:56<10:10:21, 11.27s/it]


訓練次數1750，總回報393.9622641509413


 35%|███████████▉                      | 1760/5000 [4:05:07<14:43:45, 16.37s/it]


訓練次數1760，總回報-94.99999999999903


 35%|████████████                      | 1770/5000 [4:06:35<10:03:53, 11.22s/it]


訓練次數1770，總回報682.4191489361638


 36%|████████████                      | 1780/5000 [4:08:05<10:00:26, 11.19s/it]


訓練次數1780，總回報404.65342960288564


 36%|████████████▌                      | 1790/5000 [4:09:38<9:39:28, 10.83s/it]


訓練次數1790，總回報328.91228070175316


 36%|████████████▏                     | 1800/5000 [4:11:40<12:34:53, 14.15s/it]


訓練次數1800，總回報335.347040498442


 36%|████████████▋                      | 1810/5000 [4:12:51<8:13:37,  9.28s/it]


訓練次數1810，總回報284.2376811594212


 36%|████████████▋                      | 1820/5000 [4:14:34<9:18:50, 10.54s/it]


訓練次數1820，總回報352.9282229965147


 37%|████████████▍                     | 1830/5000 [4:16:08<11:01:06, 12.51s/it]


訓練次數1830，總回報851.1103678929617


 37%|████████████▌                     | 1840/5000 [4:18:13<14:05:38, 16.06s/it]


訓練次數1840，總回報749.9295081967132


 37%|████████████▌                     | 1850/5000 [4:19:57<10:10:16, 11.62s/it]


訓練次數1850，總回報125.43333333333378


 37%|████████████▋                     | 1860/5000 [4:21:54<10:33:29, 12.11s/it]


訓練次數1860，總回報411.29681978798516


 37%|████████████▋                     | 1870/5000 [4:24:10<12:15:03, 14.09s/it]


訓練次數1870，總回報52.12978723404247


 38%|████████████▊                     | 1880/5000 [4:25:55<10:57:58, 12.65s/it]


訓練次數1880，總回報289.7604938271615


 38%|█████████████▏                     | 1890/5000 [4:27:29<9:16:54, 10.74s/it]


訓練次數1890，總回報290.5686274509803


 38%|████████████▉                     | 1900/5000 [4:29:19<10:38:08, 12.35s/it]


訓練次數1900，總回報402.29303135888307


 38%|█████████████▎                     | 1910/5000 [4:30:44<9:00:35, 10.50s/it]


訓練次數1910，總回報578.3935251798498


 38%|█████████████                     | 1920/5000 [4:32:43<11:45:14, 13.74s/it]


訓練次數1920，總回報448.1606557377021


 39%|█████████████                     | 1930/5000 [4:34:14<10:53:31, 12.77s/it]


訓練次數1930，總回報308.1479233226838


 39%|█████████████▌                     | 1940/5000 [4:35:40<8:58:49, 10.57s/it]


訓練次數1940，總回報513.1905723905666


 39%|█████████████▎                    | 1950/5000 [4:37:19<10:38:49, 12.57s/it]


訓練次數1950，總回報439.3924528301852


 39%|█████████████▎                    | 1960/5000 [4:39:16<10:27:23, 12.38s/it]


訓練次數1960，總回報518.581818181816


 39%|█████████████▊                     | 1970/5000 [4:40:39<6:13:54,  7.40s/it]


訓練次數1970，總回報96.60310077519395


 40%|█████████████▍                    | 1980/5000 [4:42:17<10:04:45, 12.01s/it]


訓練次數1980，總回報436.1688581314867


 40%|█████████████▌                    | 1990/5000 [4:44:12<11:42:33, 14.00s/it]


訓練次數1990，總回報854.6293286218955


 40%|█████████████▌                    | 2000/5000 [4:46:14<13:29:02, 16.18s/it]


訓練次數2000，總回報871.9428571428427


 40%|█████████████▋                    | 2010/5000 [4:48:16<12:33:03, 15.11s/it]


訓練次數2010，總回報486.1031802120128


 40%|██████████████▏                    | 2020/5000 [4:49:59<9:17:38, 11.23s/it]


訓練次數2020，總回報581.479310344821


 41%|█████████████▊                    | 2030/5000 [4:51:56<12:40:18, 15.36s/it]


訓練次數2030，總回報536.3978494623616


 41%|█████████████▊                    | 2040/5000 [4:53:49<11:35:29, 14.10s/it]


訓練次數2040，總回報352.62200647249057


 41%|██████████████▎                    | 2050/5000 [4:55:19<7:42:13,  9.40s/it]


訓練次數2050，總回報242.72727272727352


 41%|██████████████                    | 2060/5000 [4:56:42<10:19:39, 12.65s/it]


訓練次數2060，總回報866.8351916376198


 41%|██████████████▍                    | 2070/5000 [4:57:57<8:18:47, 10.21s/it]


訓練次數2070，總回報293.41355932203385


 42%|██████████████▌                    | 2080/5000 [4:59:18<9:14:23, 11.39s/it]


訓練次數2080，總回報495.52454212453995


 42%|██████████████▏                   | 2090/5000 [5:01:02<10:25:49, 12.90s/it]


訓練次數2090，總回報144.37773851590134


 42%|██████████████▎                   | 2100/5000 [5:02:36<10:01:21, 12.44s/it]


訓練次數2100，總回報-94.99999999999905


 42%|██████████████▊                    | 2110/5000 [5:04:08<8:24:24, 10.47s/it]


訓練次數2110，總回報536.6729729729686


 42%|██████████████▍                   | 2120/5000 [5:06:01<10:09:54, 12.71s/it]


訓練次數2120，總回報497.02176870748076


 43%|██████████████▉                    | 2130/5000 [5:07:33<8:20:05, 10.45s/it]


訓練次數2130，總回報196.01538461538524


 43%|██████████████▉                    | 2140/5000 [5:09:18<6:50:57,  8.62s/it]


訓練次數2140，總回報62.4424657534246


 43%|██████████████▌                   | 2150/5000 [5:11:05<10:27:56, 13.22s/it]


訓練次數2150，總回報495.0950166112903


 43%|██████████████▋                   | 2160/5000 [5:13:03<12:47:26, 16.21s/it]


訓練次數2160，總回報454.61821305841727


 43%|██████████████▊                   | 2170/5000 [5:15:16<12:08:28, 15.44s/it]


訓練次數2170，總回報677.8620938628122


 44%|███████████████▎                   | 2180/5000 [5:17:06<9:39:19, 12.33s/it]


訓練次數2180，總回報253.3457627118653


 44%|███████████████▎                   | 2190/5000 [5:18:28<8:34:52, 10.99s/it]


訓練次數2190，總回報-94.999999999999


 44%|███████████████▍                   | 2200/5000 [5:19:57<7:09:15,  9.20s/it]


訓練次數2200，總回報256.02116788321314


 44%|███████████████▍                   | 2210/5000 [5:21:00<5:56:15,  7.66s/it]


訓練次數2210，總回報437.4201550387587


 44%|███████████████                   | 2220/5000 [5:23:09<11:55:28, 15.44s/it]


訓練次數2220，總回報430.62447552447446


 45%|███████████████▏                  | 2230/5000 [5:24:46<11:12:05, 14.56s/it]


訓練次數2230，總回報910.4824561403366


 45%|███████████████▏                  | 2240/5000 [5:26:46<11:41:53, 15.26s/it]


訓練次數2240，總回報860.7311258278062


 45%|███████████████▊                   | 2250/5000 [5:28:04<7:45:23, 10.15s/it]


訓練次數2250，總回報48.210526315789416


 45%|███████████████▊                   | 2260/5000 [5:29:57<9:42:17, 12.75s/it]


訓練次數2260，總回報521.9637681159397


 45%|███████████████▉                   | 2270/5000 [5:31:30<6:00:06,  7.91s/it]


訓練次數2270，總回報256.5712328767128


 46%|███████████████▉                   | 2280/5000 [5:33:12<9:24:00, 12.44s/it]


訓練次數2280，總回報235.2918918918931


 46%|███████████████▌                  | 2290/5000 [5:35:20<12:49:14, 17.03s/it]


訓練次數2290，總回報915.506993006981


 46%|████████████████                   | 2300/5000 [5:37:11<8:55:56, 11.91s/it]


訓練次數2300，總回報84.29510489510501


 46%|████████████████▏                  | 2310/5000 [5:39:18<9:25:19, 12.61s/it]


訓練次數2310，總回報136.47560137457086


 46%|████████████████▏                  | 2320/5000 [5:41:13<9:08:27, 12.28s/it]


訓練次數2320，總回報519.0788395904377


 47%|███████████████▊                  | 2330/5000 [5:43:04<10:08:26, 13.67s/it]


訓練次數2330，總回報46.947670250895996


 47%|████████████████▍                  | 2340/5000 [5:44:57<9:21:33, 12.67s/it]


訓練次數2340，總回報410.47176079733975


 47%|███████████████▉                  | 2350/5000 [5:46:47<10:53:51, 14.80s/it]


訓練次數2350，總回報101.28649517684899


 47%|████████████████▌                  | 2360/5000 [5:48:39<8:51:21, 12.08s/it]


訓練次數2360，總回報553.0886075949309


 47%|████████████████▌                  | 2370/5000 [5:50:13<8:11:16, 11.21s/it]


訓練次數2370，總回報325.64509803921567


 48%|████████████████▋                  | 2380/5000 [5:51:58<9:16:50, 12.75s/it]


訓練次數2380，總回報90.44163568773246


 48%|████████████████▋                  | 2390/5000 [5:53:24<4:46:12,  6.58s/it]


訓練次數2390，總回報39.44423676012456


 48%|████████████████▊                  | 2400/5000 [5:55:18<9:18:40, 12.89s/it]


訓練次數2400，總回報369.177419354838


 48%|████████████████▊                  | 2410/5000 [5:56:52<8:00:24, 11.13s/it]


訓練次數2410，總回報550.9660516605136


 48%|████████████████▉                  | 2420/5000 [5:58:33<7:36:28, 10.62s/it]


訓練次數2420，總回報49.60287539936095


 49%|█████████████████                  | 2430/5000 [6:00:35<9:11:50, 12.88s/it]


訓練次數2430，總回報533.6587458745848


 49%|█████████████████                  | 2440/5000 [6:02:30<8:04:49, 11.36s/it]


訓練次數2440，總回報164.5197183098597


 49%|█████████████████▏                 | 2450/5000 [6:04:04<8:10:24, 11.54s/it]


訓練次數2450，總回報345.7999999999993


 49%|█████████████████▏                 | 2460/5000 [6:05:57<9:55:09, 14.06s/it]


訓練次數2460，總回報275.80191082802594


 49%|█████████████████▎                 | 2470/5000 [6:07:25<8:31:41, 12.14s/it]


訓練次數2470，總回報334.2567901234571


 50%|████████████████▊                 | 2480/5000 [6:09:45<12:59:03, 18.55s/it]


訓練次數2480，總回報914.4536231883994


 50%|████████████████▉                 | 2490/5000 [6:11:50<10:32:48, 15.13s/it]


訓練次數2490，總回報793.1243816254322


 50%|█████████████████▌                 | 2500/5000 [6:13:57<9:53:45, 14.25s/it]


訓練次數2500，總回報43.87246376811587


 50%|█████████████████▌                 | 2510/5000 [6:15:36<9:10:24, 13.26s/it]


訓練次數2510，總回報755.5747126436682


 50%|█████████████████▋                 | 2520/5000 [6:17:24<8:03:38, 11.70s/it]


訓練次數2520，總回報534.8060070671352


 51%|█████████████████▋                 | 2530/5000 [6:19:04<7:02:22, 10.26s/it]


訓練次數2530，總回報94.65423728813562


 51%|█████████████████▎                | 2540/5000 [6:21:16<12:49:19, 18.76s/it]


訓練次數2540，總回報567.4292358803921


 51%|█████████████████▊                 | 2550/5000 [6:23:10<7:46:45, 11.43s/it]


訓練次數2550，總回報121.8255319148938


 51%|█████████████████▉                 | 2560/5000 [6:25:07<9:47:37, 14.45s/it]


訓練次數2560，總回報316.4638297872344


 51%|█████████████████▉                 | 2570/5000 [6:26:43<7:04:31, 10.48s/it]


訓練次數2570，總回報891.9299212598366


 52%|██████████████████                 | 2580/5000 [6:28:34<9:10:51, 13.66s/it]


訓練次數2580，總回報340.1623052959499


 52%|██████████████████▏                | 2590/5000 [6:30:21<8:09:00, 12.17s/it]


訓練次數2590，總回報148.53932584269694


 52%|██████████████████▏                | 2600/5000 [6:31:59<7:26:27, 11.16s/it]


訓練次數2600，總回報365.3947368421036


 52%|██████████████████▎                | 2610/5000 [6:34:02<9:55:56, 14.96s/it]


訓練次數2610，總回報597.6792387543223


 52%|█████████████████▊                | 2620/5000 [6:35:59<10:00:22, 15.14s/it]


訓練次數2620，總回報359.3631205673755


 53%|██████████████████▍                | 2630/5000 [6:37:29<7:25:16, 11.27s/it]


訓練次數2630，總回報383.7670846394973


 53%|██████████████████▍                | 2640/5000 [6:39:25<9:06:52, 13.90s/it]


訓練次數2640，總回報107.97647058823544


 53%|██████████████████▌                | 2650/5000 [6:41:02<6:46:03, 10.37s/it]


訓練次數2650，總回報100.6278145695367


 53%|██████████████████▌                | 2660/5000 [6:43:02<8:49:13, 13.57s/it]


訓練次數2660，總回報751.9764705882267


 53%|██████████████████▋                | 2670/5000 [6:44:50<8:12:53, 12.69s/it]


訓練次數2670，總回報526.4423357664208


 54%|██████████████████▊                | 2680/5000 [6:46:23<9:16:33, 14.39s/it]


訓練次數2680，總回報414.95988023951656


 54%|██████████████████▎               | 2690/5000 [6:48:47<12:34:48, 19.61s/it]


訓練次數2690，總回報868.4525423728712


 54%|██████████████████▉                | 2700/5000 [6:50:38<9:50:04, 15.39s/it]


訓練次數2700，總回報599.6030303030262


 54%|██████████████████▉                | 2710/5000 [6:52:18<8:33:55, 13.47s/it]


訓練次數2710，總回報914.7242424242299


 54%|███████████████████                | 2720/5000 [6:54:01<9:29:33, 14.99s/it]


訓練次數2720，總回報900.3925925925793


 55%|███████████████████                | 2730/5000 [6:56:14<9:40:07, 15.33s/it]


訓練次數2730，總回報271.23030303030396


 55%|███████████████████▏               | 2740/5000 [6:58:16<8:48:37, 14.03s/it]


訓練次數2740，總回報315.6241379310337


 55%|██████████████████▋               | 2750/5000 [7:00:40<12:14:09, 19.58s/it]


訓練次數2750，總回報900.6210526315645


 55%|███████████████████▎               | 2760/5000 [7:02:41<9:07:14, 14.66s/it]


訓練次數2760，總回報540.5944444444395


 55%|███████████████████▍               | 2770/5000 [7:04:03<6:51:39, 11.08s/it]


訓練次數2770，總回報305.30034602076137


 56%|███████████████████▍               | 2780/5000 [7:06:17<8:33:40, 13.88s/it]


訓練次數2780，總回報562.2620462046171


 56%|██████████████████▉               | 2790/5000 [7:08:24<10:20:38, 16.85s/it]


訓練次數2790，總回報911.6551020408036


 56%|███████████████████▌               | 2800/5000 [7:10:36<9:36:15, 15.72s/it]


訓練次數2800，總回報903.1271477663071


 56%|███████████████████▋               | 2810/5000 [7:12:37<9:24:53, 15.48s/it]


訓練次數2810，總回報101.58617886178877


 56%|███████████████████▋               | 2820/5000 [7:14:42<9:04:59, 15.00s/it]


訓練次數2820，總回報113.86713286713305


 57%|███████████████████▊               | 2830/5000 [7:16:32<6:21:14, 10.54s/it]


訓練次數2830，總回報146.41716171617216


 57%|███████████████████▉               | 2840/5000 [7:18:16<5:36:50,  9.36s/it]


訓練次數2840，總回報99.40247933884311


 57%|███████████████████▉               | 2850/5000 [7:20:05<8:06:10, 13.57s/it]


訓練次數2850，總回報641.5142857142787


 57%|████████████████████               | 2860/5000 [7:21:28<4:40:09,  7.85s/it]


訓練次數2860，總回報52.43333333333327


 57%|████████████████████               | 2870/5000 [7:23:39<8:35:05, 14.51s/it]


訓練次數2870，總回報738.9607508532315


 58%|████████████████████▏              | 2880/5000 [7:25:06<4:48:24,  8.16s/it]


訓練次數2880，總回報388.87142857142703


 58%|████████████████████▏              | 2890/5000 [7:26:47<8:23:46, 14.33s/it]


訓練次數2890，總回報917.7032786885103


 58%|████████████████████▎              | 2900/5000 [7:28:48<7:55:21, 13.58s/it]


訓練次數2900，總回報369.63948220064503


 58%|███████████████████▊              | 2910/5000 [7:30:40<10:20:54, 17.82s/it]


訓練次數2910，總回報895.0662251655535


 58%|███████████████████▊              | 2920/5000 [7:32:53<10:29:20, 18.15s/it]


訓練次數2920，總回報910.4069930069799


 59%|████████████████████▌              | 2930/5000 [7:34:32<6:02:58, 10.52s/it]


訓練次數2930，總回報559.0210526315733


 59%|████████████████████▌              | 2940/5000 [7:36:10<6:02:37, 10.56s/it]


訓練次數2940，總回報336.6711864406773


 59%|████████████████████▋              | 2950/5000 [7:38:25<9:30:55, 16.71s/it]


訓練次數2950，總回報462.1845117845083


 59%|████████████████████▋              | 2960/5000 [7:40:13<8:31:16, 15.04s/it]


訓練次數2960，總回報903.5972789115567


 59%|████████████████████▊              | 2970/5000 [7:42:23<8:10:19, 14.49s/it]


訓練次數2970，總回報702.4124999999946


 60%|████████████████████▎             | 2980/5000 [7:44:50<10:19:36, 18.40s/it]


訓練次數2980，總回報596.3903225806413


 60%|████████████████████▉              | 2990/5000 [7:46:30<6:47:35, 12.17s/it]


訓練次數2990，總回報408.0888888888876


 60%|█████████████████████              | 3000/5000 [7:48:06<6:54:51, 12.45s/it]


訓練次數3000，總回報82.37845659163992


 60%|█████████████████████              | 3010/5000 [7:50:20<8:34:03, 15.50s/it]


訓練次數3010，總回報882.5055555555406


 60%|█████████████████████▏             | 3020/5000 [7:52:23<7:33:02, 13.73s/it]


訓練次數3020，總回報602.0964980544711


 61%|█████████████████████▏             | 3030/5000 [7:54:20<7:52:26, 14.39s/it]


訓練次數3030，總回報390.22962962962777


 61%|█████████████████████▎             | 3040/5000 [7:56:11<7:35:07, 13.93s/it]


訓練次數3040，總回報902.7555555555379


 61%|█████████████████████▎             | 3050/5000 [7:57:19<3:42:26,  6.84s/it]


訓練次數3050，總回報191.9257861635229


 61%|█████████████████████▍             | 3060/5000 [7:59:24<6:30:05, 12.06s/it]


訓練次數3060，總回報136.26190476190524


 61%|█████████████████████▍             | 3070/5000 [8:01:05<4:18:02,  8.02s/it]


訓練次數3070，總回報46.7018126888217


 62%|█████████████████████▌             | 3080/5000 [8:02:56<7:40:58, 14.41s/it]


訓練次數3080，總回報868.1536423840978


 62%|█████████████████████▋             | 3090/5000 [8:04:39<8:42:00, 16.40s/it]


訓練次數3090，總回報905.897278911557


 62%|█████████████████████▋             | 3100/5000 [8:06:42<8:53:15, 16.84s/it]


訓練次數3100，總回報908.4972789115578


 62%|█████████████████████▊             | 3110/5000 [8:08:50<7:45:59, 14.79s/it]


訓練次數3110，總回報876.7947368420918


 62%|█████████████████████▊             | 3120/5000 [8:10:50<7:48:17, 14.95s/it]


訓練次數3120，總回報551.6573770491763


 63%|█████████████████████▉             | 3130/5000 [8:12:58<8:32:27, 16.44s/it]


訓練次數3130，總回報879.6985507246324


 63%|█████████████████████▉             | 3140/5000 [8:14:52<6:15:17, 12.11s/it]


訓練次數3140，總回報145.85396825396867


 63%|██████████████████████             | 3150/5000 [8:16:42<6:54:28, 13.44s/it]


訓練次數3150，總回報912.3888888888774


 63%|██████████████████████             | 3160/5000 [8:18:42<8:15:37, 16.16s/it]


訓練次數3160，總回報273.5973063973073


 63%|██████████████████████▏            | 3170/5000 [8:20:53<7:26:05, 14.63s/it]


訓練次數3170，總回報-94.99999999999903


 64%|██████████████████████▎            | 3180/5000 [8:23:14<7:22:17, 14.58s/it]


訓練次數3180，總回報753.4666666666604


 64%|██████████████████████▎            | 3190/5000 [8:25:24<8:14:24, 16.39s/it]


訓練次數3190，總回報513.4219512195095


 64%|██████████████████████▍            | 3200/5000 [8:27:02<5:35:12, 11.17s/it]


訓練次數3200，總回報106.0157894736844


 64%|██████████████████████▍            | 3210/5000 [8:28:48<6:23:11, 12.84s/it]


訓練次數3210，總回報774.9015873015821


 64%|██████████████████████▌            | 3220/5000 [8:31:00<8:18:53, 16.82s/it]


訓練次數3220，總回報710.5137254901848


 65%|██████████████████████▌            | 3230/5000 [8:32:52<5:08:23, 10.45s/it]


訓練次數3230，總回報307.0372549019611


 65%|██████████████████████▋            | 3240/5000 [8:35:24<9:03:42, 18.54s/it]


訓練次數3240，總回報720.8292682926715


 65%|██████████████████████▊            | 3250/5000 [8:37:27<6:54:07, 14.20s/it]


訓練次數3250，總回報909.3093632958711


 65%|██████████████████████▊            | 3260/5000 [8:40:07<8:05:44, 16.75s/it]


訓練次數3260，總回報86.82684563758399


 65%|██████████████████████▉            | 3270/5000 [8:41:51<6:37:01, 13.77s/it]


訓練次數3270，總回報404.55867507886103


 66%|██████████████████████▉            | 3280/5000 [8:43:37<7:02:35, 14.74s/it]


訓練次數3280，總回報394.29999999999876


 66%|███████████████████████            | 3290/5000 [8:45:20<5:47:53, 12.21s/it]


訓練次數3290，總回報882.9043795620317


 66%|███████████████████████            | 3300/5000 [8:47:09<7:32:45, 15.98s/it]


訓練次數3300，總回報916.1812030075047


 66%|███████████████████████▏           | 3310/5000 [8:49:16<7:45:50, 16.54s/it]


訓練次數3310，總回報903.3102236421618


 66%|███████████████████████▏           | 3320/5000 [8:51:01<6:24:53, 13.75s/it]


訓練次數3320，總回報135.71698113207572


 67%|███████████████████████▎           | 3330/5000 [8:53:00<8:20:51, 17.99s/it]


訓練次數3330，總回報586.0626506024037


 67%|███████████████████████▍           | 3340/5000 [8:55:14<6:37:03, 14.35s/it]


訓練次數3340，總回報357.2602739726019


 67%|███████████████████████▍           | 3350/5000 [8:57:17<7:16:47, 15.88s/it]


訓練次數3350，總回報824.4947368420934


 67%|███████████████████████▌           | 3360/5000 [8:59:28<7:46:40, 17.07s/it]


訓練次數3360，總回報872.9898734177133


 67%|███████████████████████▌           | 3370/5000 [9:00:56<6:34:56, 14.54s/it]


訓練次數3370，總回報738.1645390070863


 68%|███████████████████████▋           | 3380/5000 [9:03:02<6:33:38, 14.58s/it]


訓練次數3380，總回報910.721052631567


 68%|███████████████████████▋           | 3390/5000 [9:05:19<7:35:29, 16.97s/it]


訓練次數3390，總回報901.1483870967659


 68%|███████████████████████▊           | 3400/5000 [9:07:22<6:57:54, 15.67s/it]


訓練次數3400，總回報602.4966789667857


 68%|███████████████████████▊           | 3410/5000 [9:09:48<8:01:14, 18.16s/it]


訓練次數3410，總回報882.3315436241485


 68%|███████████████████████▉           | 3420/5000 [9:11:44<4:53:04, 11.13s/it]


訓練次數3420，總回報916.7354838709564


 69%|████████████████████████           | 3430/5000 [9:13:54<5:48:47, 13.33s/it]


訓練次數3430，總回報620.20909090908


 69%|████████████████████████           | 3440/5000 [9:15:56<7:08:53, 16.50s/it]


訓練次數3440，總回報414.37142857142453


 69%|████████████████████████▏          | 3450/5000 [9:18:12<7:24:08, 17.19s/it]


訓練次數3450，總回報914.35555555554


 69%|████████████████████████▏          | 3460/5000 [9:20:11<5:34:46, 13.04s/it]


訓練次數3460，總回報891.6468164793943


 69%|████████████████████████▎          | 3470/5000 [9:22:34<7:24:05, 17.42s/it]


訓練次數3470，總回報763.8315614617878


 70%|████████████████████████▎          | 3480/5000 [9:24:12<4:35:50, 10.89s/it]


訓練次數3480，總回報138.02222222222275


 70%|████████████████████████▍          | 3490/5000 [9:26:06<6:08:14, 14.63s/it]


訓練次數3490，總回報917.9076923076803


 70%|████████████████████████▌          | 3500/5000 [9:27:46<4:41:51, 11.27s/it]


訓練次數3500，總回報568.9783783783729


 70%|████████████████████████▌          | 3510/5000 [9:30:05<6:18:10, 15.23s/it]


訓練次數3510，總回報56.36417910447755


 70%|████████████████████████▋          | 3520/5000 [9:32:11<6:43:51, 16.37s/it]


訓練次數3520，總回報351.2857142857123


 71%|████████████████████████▋          | 3530/5000 [9:34:15<6:07:36, 15.00s/it]


訓練次數3530，總回報858.7366548042579


 71%|████████████████████████▊          | 3540/5000 [9:36:33<7:06:55, 17.55s/it]


訓練次數3540，總回報803.3050847457514


 71%|████████████████████████▊          | 3550/5000 [9:38:01<3:20:20,  8.29s/it]


訓練次數3550，總回報89.81229235880407


 71%|████████████████████████▉          | 3560/5000 [9:40:23<7:36:21, 19.02s/it]


訓練次數3560，總回報731.6148148148015


 71%|████████████████████████▉          | 3570/5000 [9:42:16<3:55:16,  9.87s/it]


訓練次數3570，總回報130.77391304347861


 72%|█████████████████████████          | 3580/5000 [9:43:47<3:56:17,  9.98s/it]


訓練次數3580，總回報74.93722397476344


 72%|█████████████████████████▏         | 3590/5000 [9:45:43<5:50:33, 14.92s/it]


訓練次數3590，總回報877.178700361006


 72%|█████████████████████████▏         | 3600/5000 [9:47:22<5:23:17, 13.86s/it]


訓練次數3600，總回報544.2511705685582


 72%|█████████████████████████▎         | 3610/5000 [9:49:28<5:36:23, 14.52s/it]


訓練次數3610，總回報309.79325153374214


 72%|█████████████████████████▎         | 3620/5000 [9:51:22<6:11:11, 16.14s/it]


訓練次數3620，總回報353.67499999999916


 73%|█████████████████████████▍         | 3630/5000 [9:53:29<5:33:17, 14.60s/it]


訓練次數3630，總回報918.4568627450844


 73%|█████████████████████████▍         | 3640/5000 [9:55:22<5:33:37, 14.72s/it]


訓練次數3640，總回報582.128571428564


 73%|█████████████████████████▌         | 3650/5000 [9:57:05<4:40:08, 12.45s/it]


訓練次數3650，總回報99.81851851851874


 73%|█████████████████████████▌         | 3660/5000 [9:59:08<6:59:24, 18.78s/it]


訓練次數3660，總回報857.8301886792266


 73%|████████████████████████▉         | 3670/5000 [10:01:11<5:43:01, 15.47s/it]


訓練次數3670，總回報746.7476510067022


 74%|█████████████████████████         | 3680/5000 [10:02:51<3:35:35,  9.80s/it]


訓練次數3680，總回報426.4299401197554


 74%|█████████████████████████         | 3690/5000 [10:04:55<6:31:36, 17.94s/it]


訓練次數3690，總回報844.1548494983107


 74%|█████████████████████████▏        | 3700/5000 [10:07:06<7:04:43, 19.60s/it]


訓練次數3700，總回報900.0432432432247


 74%|█████████████████████████▏        | 3710/5000 [10:09:18<5:25:30, 15.14s/it]


訓練次數3710，總回報43.38571428571422


 74%|█████████████████████████▎        | 3720/5000 [10:11:14<4:31:07, 12.71s/it]


訓練次數3720，總回報445.6032028469731


 75%|█████████████████████████▎        | 3730/5000 [10:12:54<4:20:05, 12.29s/it]


訓練次數3730，總回報520.8291338582655


 75%|█████████████████████████▍        | 3740/5000 [10:14:33<3:45:41, 10.75s/it]


訓練次數3740，總回報147.46666666666704


 75%|█████████████████████████▌        | 3750/5000 [10:16:47<5:08:10, 14.79s/it]


訓練次數3750，總回報445.1819672131119


 75%|█████████████████████████▌        | 3760/5000 [10:18:51<4:32:43, 13.20s/it]


訓練次數3760，總回報436.8129476583999


 75%|█████████████████████████▋        | 3770/5000 [10:20:52<5:20:37, 15.64s/it]


訓練次數3770，總回報884.7971731448657


 76%|█████████████████████████▋        | 3780/5000 [10:22:52<5:10:53, 15.29s/it]


訓練次數3780，總回報274.089542483661


 76%|█████████████████████████▊        | 3790/5000 [10:24:51<4:19:16, 12.86s/it]


訓練次數3790，總回報572.2186440677918


 76%|█████████████████████████▊        | 3800/5000 [10:26:30<4:21:07, 13.06s/it]


訓練次數3800，總回報915.6007299269941


 76%|█████████████████████████▉        | 3810/5000 [10:28:08<3:41:43, 11.18s/it]


訓練次數3810，總回報413.3985507246367


 76%|█████████████████████████▉        | 3820/5000 [10:30:01<4:06:53, 12.55s/it]


訓練次數3820，總回報434.10865051903016


 77%|██████████████████████████        | 3830/5000 [10:32:07<5:20:29, 16.44s/it]


訓練次數3830，總回報883.4518518518423


 77%|██████████████████████████        | 3840/5000 [10:33:29<3:36:13, 11.18s/it]


訓練次數3840，總回報194.25426356589205


 77%|██████████████████████████▏       | 3850/5000 [10:35:05<3:49:06, 11.95s/it]


訓練次數3850，總回報115.52818791946338


 77%|██████████████████████████▏       | 3860/5000 [10:37:14<5:11:34, 16.40s/it]


訓練次數3860，總回報820.624999999987


 77%|██████████████████████████▎       | 3870/5000 [10:39:26<5:08:27, 16.38s/it]


訓練次數3870，總回報-94.99999999999898


 78%|██████████████████████████▍       | 3880/5000 [10:41:25<4:32:04, 14.58s/it]


訓練次數3880，總回報718.9205479451944


 78%|██████████████████████████▍       | 3890/5000 [10:43:50<4:43:42, 15.34s/it]


訓練次數3890，總回報907.3925925925789


 78%|██████████████████████████▌       | 3900/5000 [10:45:59<4:25:14, 14.47s/it]


訓練次數3900，總回報430.86759581881313


 78%|██████████████████████████▌       | 3910/5000 [10:47:51<3:56:27, 13.02s/it]


訓練次數3910，總回報876.0538461538354


 78%|██████████████████████████▋       | 3920/5000 [10:49:28<2:45:55,  9.22s/it]


訓練次數3920，總回報120.30000000000022


 79%|██████████████████████████▋       | 3930/5000 [10:51:56<5:32:06, 18.62s/it]


訓練次數3930，總回報877.0179487179425


 79%|██████████████████████████▊       | 3940/5000 [10:53:45<4:08:07, 14.04s/it]


訓練次數3940，總回報288.0731343283586


 79%|██████████████████████████▊       | 3950/5000 [10:56:12<4:55:06, 16.86s/it]


訓練次數3950，總回報779.1401360544174


 79%|██████████████████████████▉       | 3960/5000 [10:58:06<4:32:53, 15.74s/it]


訓練次數3960，總回報816.5017421602703


 79%|██████████████████████████▉       | 3970/5000 [11:00:15<4:37:20, 16.16s/it]


訓練次數3970，總回報863.990536277591


 80%|███████████████████████████       | 3980/5000 [11:01:49<2:51:01, 10.06s/it]


訓練次數3980，總回報558.7386100386042


 80%|███████████████████████████▏      | 3990/5000 [11:03:36<3:10:50, 11.34s/it]


訓練次數3990，總回報879.9957095709516


 80%|███████████████████████████▏      | 4000/5000 [11:05:31<3:45:26, 13.53s/it]


訓練次數4000，總回報452.4965779467665


 80%|███████████████████████████▎      | 4010/5000 [11:07:25<3:43:30, 13.55s/it]


訓練次數4010，總回報914.4874999999904


 80%|███████████████████████████▎      | 4020/5000 [11:08:59<3:23:02, 12.43s/it]


訓練次數4020，總回報319.83220338983017


 81%|███████████████████████████▍      | 4030/5000 [11:10:54<2:51:25, 10.60s/it]


訓練次數4030，總回報357.39629629629496


 81%|███████████████████████████▍      | 4040/5000 [11:12:40<2:14:31,  8.41s/it]


訓練次數4040，總回報60.46494845360817


 81%|███████████████████████████▌      | 4050/5000 [11:14:14<2:41:38, 10.21s/it]


訓練次數4050，總回報48.79999999999995


 81%|███████████████████████████▌      | 4060/5000 [11:16:30<4:31:29, 17.33s/it]


訓練次數4060，總回報881.0047619047479


 81%|███████████████████████████▋      | 4070/5000 [11:17:59<2:54:18, 11.25s/it]


訓練次數4070，總回報293.6022346368721


 82%|███████████████████████████▋      | 4080/5000 [11:19:37<2:38:09, 10.31s/it]


訓練次數4080，總回報430.32727272726993


 82%|███████████████████████████▊      | 4090/5000 [11:21:27<3:01:12, 11.95s/it]


訓練次數4090，總回報123.66619718309882


 82%|███████████████████████████▉      | 4100/5000 [11:23:31<3:22:12, 13.48s/it]


訓練次數4100，總回報316.6668769716082


 82%|███████████████████████████▉      | 4110/5000 [11:25:36<3:30:07, 14.17s/it]


訓練次數4110，總回報916.8076923076798


 82%|████████████████████████████      | 4120/5000 [11:27:24<3:13:44, 13.21s/it]


訓練次數4120，總回報362.7079754601217


 83%|████████████████████████████      | 4130/5000 [11:28:59<2:37:27, 10.86s/it]


訓練次數4130，總回報392.0057324840735


 83%|████████████████████████████▏     | 4140/5000 [11:31:14<3:37:04, 15.14s/it]


訓練次數4140，總回報443.20649350649126


 83%|████████████████████████████▏     | 4150/5000 [11:33:01<3:24:31, 14.44s/it]


訓練次數4150，總回報94.8011070110703


 83%|████████████████████████████▎     | 4160/5000 [11:35:00<3:19:31, 14.25s/it]


訓練次數4160，總回報340.2031446540868


 83%|████████████████████████████▎     | 4170/5000 [11:36:54<3:41:50, 16.04s/it]


訓練次數4170，總回報850.8399267399193


 84%|████████████████████████████▍     | 4180/5000 [11:38:15<2:15:57,  9.95s/it]


訓練次數4180，總回報383.1769230769222


 84%|████████████████████████████▍     | 4190/5000 [11:40:01<3:09:20, 14.02s/it]


訓練次數4190，總回報909.3993399339867


 84%|████████████████████████████▌     | 4200/5000 [11:42:07<3:18:40, 14.90s/it]


訓練次數4200，總回報576.5229508196658


 84%|████████████████████████████▋     | 4210/5000 [11:43:48<2:44:02, 12.46s/it]


訓練次數4210，總回報406.03164556961815


 84%|████████████████████████████▋     | 4220/5000 [11:45:37<2:41:17, 12.41s/it]


訓練次數4220，總回報46.775757575757524


 85%|████████████████████████████▊     | 4230/5000 [11:47:23<3:01:16, 14.13s/it]


訓練次數4230，總回報349.38630136986245


 85%|████████████████████████████▊     | 4240/5000 [11:49:21<2:44:31, 12.99s/it]


訓練次數4240，總回報913.6824561403374


 85%|████████████████████████████▉     | 4250/5000 [11:51:24<3:05:44, 14.86s/it]


訓練次數4250，總回報528.0692307692277


 85%|████████████████████████████▉     | 4260/5000 [11:53:14<2:33:04, 12.41s/it]


訓練次數4260，總回報531.1274368231008


 85%|█████████████████████████████     | 4270/5000 [11:55:04<2:23:09, 11.77s/it]


訓練次數4270，總回報60.86435986159163


 86%|█████████████████████████████     | 4280/5000 [11:57:06<2:38:47, 13.23s/it]


訓練次數4280，總回報557.2776061776007


 86%|█████████████████████████████▏    | 4290/5000 [11:59:33<2:57:29, 15.00s/it]


訓練次數4290，總回報240.82317880794776


 86%|█████████████████████████████▏    | 4300/5000 [12:01:40<3:24:45, 17.55s/it]


訓練次數4300，總回報657.1130584192373


 86%|█████████████████████████████▎    | 4310/5000 [12:03:09<1:58:34, 10.31s/it]


訓練次數4310，總回報197.30000000000075


 86%|█████████████████████████████▍    | 4320/5000 [12:04:54<2:05:16, 11.05s/it]


訓練次數4320，總回報515.6328358208908


 87%|█████████████████████████████▍    | 4330/5000 [12:06:26<2:31:56, 13.61s/it]


訓練次數4330，總回報884.7883720930198


 87%|█████████████████████████████▌    | 4340/5000 [12:08:04<2:26:21, 13.31s/it]


訓練次數4340，總回報528.8588235294077


 87%|█████████████████████████████▌    | 4350/5000 [12:10:19<2:44:48, 15.21s/it]


訓練次數4350，總回報586.9999999999949


 87%|█████████████████████████████▋    | 4360/5000 [12:12:25<2:52:08, 16.14s/it]


訓練次數4360，總回報557.64013377926


 87%|█████████████████████████████▋    | 4370/5000 [12:14:13<1:49:29, 10.43s/it]


訓練次數4370，總回報203.5580645161295


 88%|█████████████████████████████▊    | 4380/5000 [12:16:04<2:05:08, 12.11s/it]


訓練次數4380，總回報343.25209003215275


 88%|█████████████████████████████▊    | 4390/5000 [12:18:02<2:31:36, 14.91s/it]


訓練次數4390，總回報643.0888888888825


 88%|█████████████████████████████▉    | 4400/5000 [12:20:06<2:33:31, 15.35s/it]


訓練次數4400，總回報898.5769230769109


 88%|█████████████████████████████▉    | 4410/5000 [12:21:40<1:47:00, 10.88s/it]


訓練次數4410，總回報224.8246575342471


 88%|██████████████████████████████    | 4420/5000 [12:23:42<2:29:26, 15.46s/it]


訓練次數4420，總回報400.4939393939385


 89%|██████████████████████████████    | 4430/5000 [12:25:34<1:58:28, 12.47s/it]


訓練次數4430，總回報539.19124087591


 89%|██████████████████████████████▏   | 4440/5000 [12:27:23<1:55:22, 12.36s/it]


訓練次數4440，總回報132.79876160990753


 89%|██████████████████████████████▎   | 4450/5000 [12:29:23<2:19:49, 15.25s/it]


訓練次數4450，總回報201.63333333333387


 89%|██████████████████████████████▎   | 4460/5000 [12:31:03<1:38:48, 10.98s/it]


訓練次數4460，總回報107.97986348122892


 89%|██████████████████████████████▍   | 4470/5000 [12:32:36<2:01:20, 13.74s/it]


訓練次數4470，總回報899.7824561403341


 90%|██████████████████████████████▍   | 4480/5000 [12:34:29<1:42:01, 11.77s/it]


訓練次數4480，總回報762.0786941580661


 90%|██████████████████████████████▌   | 4490/5000 [12:35:54<1:46:13, 12.50s/it]


訓練次數4490，總回報417.1966101694891


 90%|██████████████████████████████▌   | 4500/5000 [12:37:34<1:06:24,  7.97s/it]


訓練次數4500，總回報108.54802867383532


 90%|██████████████████████████████▋   | 4510/5000 [12:39:45<2:06:29, 15.49s/it]


訓練次數4510，總回報490.50247349823195


 90%|██████████████████████████████▋   | 4520/5000 [12:41:08<1:18:09,  9.77s/it]


訓練次數4520，總回報155.3123456790129


 91%|██████████████████████████████▊   | 4530/5000 [12:42:55<1:31:33, 11.69s/it]


訓練次數4530，總回報139.00955631399367


 91%|██████████████████████████████▊   | 4540/5000 [12:44:41<1:51:46, 14.58s/it]


訓練次數4540，總回報854.6937888198619


 91%|██████████████████████████████▉   | 4550/5000 [12:47:00<1:49:11, 14.56s/it]


訓練次數4550，總回報502.888552188548


 91%|███████████████████████████████   | 4560/5000 [12:49:10<1:38:57, 13.50s/it]


訓練次數4560，總回報194.5492537313438


 91%|███████████████████████████████   | 4570/5000 [12:50:42<1:37:09, 13.56s/it]


訓練次數4570，總回報359.74864864864827


 92%|███████████████████████████████▏  | 4580/5000 [12:52:23<1:22:48, 11.83s/it]


訓練次數4580，總回報147.5352159468443


 92%|███████████████████████████████▏  | 4590/5000 [12:53:56<1:27:26, 12.80s/it]


訓練次數4590，總回報497.2275862068922


 92%|███████████████████████████████▎  | 4600/5000 [12:55:40<1:24:20, 12.65s/it]


訓練次數4600，總回報916.8078014184318


 92%|███████████████████████████████▎  | 4610/5000 [12:58:00<1:53:08, 17.41s/it]


訓練次數4610，總回報494.9312703583031


 92%|███████████████████████████████▍  | 4620/5000 [12:59:39<1:12:41, 11.48s/it]


訓練次數4620，總回報737.6898305084671


 93%|█████████████████████████████████▎  | 4630/5000 [13:01:08<58:06,  9.42s/it]


訓練次數4630，總回報134.93465703971148


 93%|███████████████████████████████▌  | 4640/5000 [13:02:59<1:29:58, 15.00s/it]


訓練次數4640，總回報558.5923076923046


 93%|███████████████████████████████▌  | 4650/5000 [13:04:49<1:18:50, 13.52s/it]


訓練次數4650，總回報455.4835016834983


 93%|███████████████████████████████▋  | 4660/5000 [13:06:58<1:36:26, 17.02s/it]


訓練次數4660，總回報611.2676567656719


 93%|███████████████████████████████▊  | 4670/5000 [13:08:45<1:10:45, 12.87s/it]


訓練次數4670，總回報580.6708978328141


 94%|███████████████████████████████▊  | 4680/5000 [13:10:39<1:11:15, 13.36s/it]


訓練次數4680，總回報380.89729729729646


 94%|█████████████████████████████████▊  | 4690/5000 [13:12:07<52:04, 10.08s/it]


訓練次數4690，總回報112.52272727272762


 94%|███████████████████████████████▉  | 4700/5000 [13:14:08<1:07:39, 13.53s/it]


訓練次數4700，總回報687.3752475247481


 94%|█████████████████████████████████▉  | 4710/5000 [13:15:46<58:57, 12.20s/it]


訓練次數4710，總回報53.517391304347754


 94%|████████████████████████████████  | 4720/5000 [13:17:59<1:18:46, 16.88s/it]


訓練次數4720，總回報423.6736842105238


 95%|██████████████████████████████████  | 4730/5000 [13:19:21<46:19, 10.29s/it]


訓練次數4730，總回報342.7777777777768


 95%|████████████████████████████████▏ | 4740/5000 [13:21:18<1:12:15, 16.67s/it]


訓練次數4740，總回報723.3496732026039


 95%|████████████████████████████████▎ | 4750/5000 [13:23:23<1:19:26, 19.07s/it]


訓練次數4750，總回報719.9466192170689


 95%|██████████████████████████████████▎ | 4760/5000 [13:25:19<58:04, 14.52s/it]


訓練次數4760，總回報655.2024911031978


 95%|████████████████████████████████▍ | 4770/5000 [13:27:27<1:08:31, 17.87s/it]


訓練次數4770，總回報-94.99999999999896


 96%|██████████████████████████████████▍ | 4780/5000 [13:29:09<44:22, 12.10s/it]


訓練次數4780，總回報923.3779922779796


 96%|██████████████████████████████████▍ | 4790/5000 [13:31:11<39:43, 11.35s/it]


訓練次數4790，總回報55.589632107023334


 96%|██████████████████████████████████▌ | 4800/5000 [13:33:09<47:43, 14.32s/it]


訓練次數4800，總回報909.8885906040124


 96%|████████████████████████████████▋ | 4810/5000 [13:35:38<1:01:04, 19.28s/it]


訓練次數4810，總回報905.6078014184293


 96%|██████████████████████████████████▋ | 4820/5000 [13:37:37<41:17, 13.76s/it]


訓練次數4820，總回報54.77222222222213


 97%|██████████████████████████████████▊ | 4830/5000 [13:39:40<40:23, 14.26s/it]


訓練次數4830，總回報132.33529411764732


 97%|██████████████████████████████████▊ | 4840/5000 [13:41:59<37:49, 14.18s/it]


訓練次數4840，總回報874.2662207357697


 97%|██████████████████████████████████▉ | 4850/5000 [13:43:49<27:12, 10.88s/it]


訓練次數4850，總回報172.61100323624657


 97%|██████████████████████████████████▉ | 4860/5000 [13:45:49<34:43, 14.89s/it]


訓練次數4860，總回報910.727147766308


 97%|███████████████████████████████████ | 4870/5000 [13:47:51<34:41, 16.01s/it]


訓練次數4870，總回報721.0496732026036


 98%|███████████████████████████████████▏| 4880/5000 [13:49:17<19:39,  9.83s/it]


訓練次數4880，總回報133.240836012862


 98%|███████████████████████████████████▏| 4890/5000 [13:51:37<35:34, 19.41s/it]


訓練次數4890，總回報901.6888198757612


 98%|███████████████████████████████████▎| 4900/5000 [13:53:26<23:17, 13.97s/it]


訓練次數4900，總回報289.81471571906417


 98%|███████████████████████████████████▎| 4910/5000 [13:55:23<19:52, 13.25s/it]


訓練次數4910，總回報453.2999999999984


 98%|███████████████████████████████████▍| 4920/5000 [13:57:16<17:32, 13.15s/it]


訓練次數4920，總回報850.2072992700599


 99%|███████████████████████████████████▍| 4930/5000 [13:59:00<16:09, 13.84s/it]


訓練次數4930，總回報873.8278195488597


 99%|███████████████████████████████████▌| 4940/5000 [14:01:11<11:52, 11.88s/it]


訓練次數4940，總回報173.3295373665487


 99%|███████████████████████████████████▋| 4950/5000 [14:03:10<10:51, 13.03s/it]


訓練次數4950，總回報291.3955307262571


 99%|███████████████████████████████████▋| 4960/5000 [14:05:01<07:50, 11.76s/it]


訓練次數4960，總回報57.000358422938994


 99%|███████████████████████████████████▊| 4970/5000 [14:06:08<04:13,  8.47s/it]


訓練次數4970，總回報80.90000000000008


100%|███████████████████████████████████▊| 4980/5000 [14:07:38<03:40, 11.00s/it]


訓練次數4980，總回報364.4444015444002


100%|███████████████████████████████████▉| 4990/5000 [14:09:33<02:08, 12.83s/it]


訓練次數4990，總回報421.3829431438116


100%|████████████████████████████████████| 5000/5000 [14:11:35<00:00, 10.22s/it]


訓練次數5000，總回報455.2746312684352





In [None]:
Agent.Record()

388.3432343234315
