# import 必要套件

In [None]:
import random
import time,math
import numpy as np
import gymnasium as gym
import gymnasium.wrappers as gym_wrap
import matplotlib.pyplot as plt
import matplotlib.animation as animation #輸出動畫影片
from IPython import display
from tqdm import tqdm
import os

In [None]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
import collections
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
class ImageEnv(gym.Wrapper):
  def __init__(self,env,stack_frames=4,delay_op=50):
    super(ImageEnv, self).__init__(env)
    self.delay_op = delay_op
    self.stack_frames = stack_frames
  def reset(self):
    s, info = self.env.reset()
    for i in range(self.delay_op):
      s, r, terminated, truncated, info = self.env.step(0)
      s=s[:72, 12:84]/255.0-0.5
      self.stacked_state = np.tile( s , (self.stack_frames,1,1) )  # [4, 84, 84]
    return self.stacked_state, info

  def step(self, action):
    reward = 0
    for _ in range(self.stack_frames):
      s, r, terminated, truncated, info = self.env.step(action)
      if r==-100:terminated=True
      s=s[:72, 12:84]/255.0-0.5
      reward += r
      if terminated or truncated:break
      self.stacked_state = np.concatenate((self.stacked_state[1:], s[np.newaxis]), axis=0)
    return self.stacked_state, reward, terminated, truncated, info

# 建立Replay Buffer類別

In [None]:
#建立經驗回放( ReplayBuffer )
class ReplayBuffer:
  def __init__(self,max_size=int(1e5), num_steps=1):
    self.s = np.zeros((max_size,4,72,72), dtype=np.float32)
    self.a = np.zeros((max_size,), dtype=np.int64)
    self.r = np.zeros((max_size, 1), dtype=np.float32)
    self.s_ = np.zeros((max_size,4,72,72), dtype=np.float32)
    self.done = np.zeros((max_size, 1), dtype=np.float32)
    self.ptr = 0
    self.size = 0
    self.max_size = max_size
    self.num_steps = num_steps

  def append(self,s,a,r,s_,done):
    self.s[self.ptr] = s
    self.a[self.ptr] = a
    self.r[self.ptr] = r
    self.s_[self.ptr] = s_
    self.done[self.ptr] = done
    self.ptr = (self.ptr + 1) % self.max_size
    self.size = min(self.size+1,self.max_size)
  def sample(self, batch_size):
    ind = np.random.randint(0, self.size, batch_size)
    return torch.FloatTensor(self.s[ind]),torch.LongTensor(self.a[ind]),torch.FloatTensor(self.r[ind]),torch.FloatTensor(self.s_[ind]),torch.FloatTensor(self.done[ind])

# 搭建DQN神經網路的類別

In [None]:
#原來 DQN 的神經網路
class DQN(torch.nn.Module):
  def __init__(self,n_act):
    super(DQN,self).__init__()
    self.conv1 = torch.nn.Conv2d(4, 16, kernel_size=4, stride=4)  #[N,4,72,72]->[N,16,18,18]
    self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=4, stride=2)  #[N,16,18,18]->[N,32,8,8]
    self.fc1 = torch.nn.Linear(32 * 8 * 8, 256)
    self.fc2 = torch.nn.Linear(256, n_act)
  def forward(self,x):
    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    x = x.view((-1, 32 * 8 * 8))
    x = self.fc1(x)
    x = self.fc2(x)
    return x

# 設定是否載入模型參數，舊參數檔路徑，新參數檔路徑

In [None]:
Load_File=0
Old_File=f"Model-{Load_File}.pt"
if Load_File>0:
  Log= np.load(f"Log-{Load_File}.npy", allow_pickle=True).item()
else:
  Log={"TrainReward":[],"TestReward":[],"Loss":[]}

In [None]:
env=gym.make('CarRacing-v3',render_mode="rgb_array",domain_randomize=False, continuous=False)
env = gym_wrap.GrayscaleObservation(env)
env = ImageEnv(env)

# 搭建智能體Agent的類別

In [None]:
class DQNAgent():
  def __init__(self,gamma=0.9,eps_low=0.1,lr=0.00025):
    self.env = env
    self.n_act=self.env.action_space.n
    self.PredictDQN= DQN(self.n_act)
    self.TargetDQN= DQN(self.n_act)
    if Load_File>0:
      self.PredictDQN.load_state_dict(torch.load(Old_File))
      self.TargetDQN.load_state_dict(torch.load(Old_File))
    self.PredictDQN.to(device)
    self.TargetDQN.to(device)
    self.LossFun=torch.nn.SmoothL1Loss()
    self.optimizer=torch.optim.Adam(self.PredictDQN.parameters(),lr=lr)
    self.gamma=gamma
    self.eps_low=eps_low
    self.rb=ReplayBuffer(max_size=10000, num_steps=1)
  def PredictA(self,s):
    with torch.no_grad():
      return torch.argmax(self.PredictDQN(torch.FloatTensor(s).to(device))).item()
  def SelectA(self,a):
    return self.env.action_space.sample() if np.random.random()<self.EPS else a
  def Train(self,N_EPISODES):
    for i in tqdm(range(Load_File,N_EPISODES)):
      self.EPS=self.eps_low+(1-self.eps_low)*math.exp(-i*12/(N_EPISODES))
      total_reward=0
      s,_=self.env.reset()
      while True:
        a=self.SelectA(self.PredictA(s))
        s_,r,done,stop,_=self.env.step(a)
        self.rb.append(s,a,r,s_,done)
        if self.rb.size > 200 and i%self.rb.num_steps==0:self.Learn()
        if i % 20==0:  self.TargetDQN.load_state_dict(self.PredictDQN.state_dict())
        s=s_
        total_reward+=r
        if done or stop:break
      # print(f"\n{total_reward}")
      Log["TrainReward"].append(total_reward)
      if i % 10 == 9:
        test_reward=self.Test()
        print(f"\n訓練次數{i+1}，總回報{test_reward}")
        Log["TestReward"].append(test_reward)
        torch.save(self.PredictDQN.state_dict(), f"Model-{i+1}.pt")
        np.save(f"Log-{i+1}.npy", Log)
  def Learn(self):
    self.optimizer.zero_grad()
    batch_s, batch_a, batch_r, batch_s_, batch_done=self.rb.sample(32)
    predict_Q = (self.PredictDQN(batch_s.to(device))*F.one_hot(batch_a.long().to(device),self.n_act)).sum(1,keepdims=True)
    with torch.no_grad():
      #加入目標網路
      target_Q = batch_r.to(device)+(1-batch_done.to(device))*self.gamma*self.TargetDQN(batch_s_.to(device)).max(1,keepdims=True)[0]
    loss = self.LossFun(predict_Q, target_Q)
    Log["Loss"].append(float(loss))
    loss.backward()
    self.optimizer.step()
  def Test(self,VIDEO=False):
    total_reward=0
    video=[]
    s,_=self.env.reset()
    while True:
      video.append(self.env.render())
      a=self.PredictA(s)
      s,r,done,stop,_=self.env.step(a)
      total_reward+=r
      if done or stop:break
    if VIDEO:
      patch = plt.imshow(video[0]) #產生展示圖形物件
      plt.axis('off') #關閉坐標軸
      def animate(i): #設定更換影格的函數
        patch.set_data(video[i])
        #plt.gcf()=>建新繪圖區 animate=>更換影格函數 frames=>影格數 interval=>影隔間距(毫秒)
      anim = animation.FuncAnimation(plt.gcf(),animate,frames=len(video),interval=200)
      anim.save('Car_Racing.mp4') #儲存為mp4擋
    return total_reward
  def Record(self):
    total_reward=0
    s,_=self.env.reset()
    while True:
      image=self.env.render()
      plt.imshow(image)
      #plt.imsave(f"/content/drive/MyDrive/recording/{str(int(time.time()))}.png", image)
      a=self.PredictA(s)
      s,r,done,stop,_=self.env.step(a)
      print(r)
      total_reward+=r
      plt.pause(0.1)
      #清除目前的顯示
      display.clear_output(wait=True)
      if done or stop:break
    print(total_reward)

In [None]:
Agent=DQNAgent(gamma=0.95,eps_low=0.05,lr=0.00025)
Agent.Train(N_EPISODES=5000)

  0%|                                      | 10/5000 [03:08<28:16:07, 20.39s/it]


訓練次數10，總回報-94.99999999999895


  0%|▏                                     | 20/5000 [05:51<23:00:55, 16.64s/it]


訓練次數20，總回報-94.99999999999898


  1%|▏                                     | 30/5000 [08:56<28:32:55, 20.68s/it]


訓練次數30，總回報-94.99999999999896


  1%|▎                                     | 40/5000 [11:52<18:51:54, 13.69s/it]


訓練次數40，總回報23.944816053511687


  1%|▍                                     | 50/5000 [14:29<25:43:44, 18.71s/it]


訓練次數50，總回報-94.99999999999892


  1%|▍                                     | 60/5000 [17:27<26:49:40, 19.55s/it]


訓練次數60，總回報-94.99999999999899


  1%|▌                                     | 70/5000 [19:48<26:19:12, 19.22s/it]


訓練次數70，總回報-94.99999999999895


  2%|▌                                     | 80/5000 [21:42<20:09:36, 14.75s/it]


訓練次數80，總回報-94.99999999999899


  2%|▋                                     | 90/5000 [23:22<13:46:02, 10.09s/it]


訓練次數90，總回報6.5884892086331135


  2%|▋                                    | 100/5000 [25:57<21:17:52, 15.65s/it]


訓練次數100，總回報3.084664536741216


  2%|▊                                    | 110/5000 [27:41<15:04:29, 11.10s/it]


訓練次數110，總回報13.597260273972626


  2%|▉                                    | 120/5000 [30:09<21:00:05, 15.49s/it]


訓練次數120，總回報-32.803278688524856


  3%|▉                                    | 130/5000 [32:23<21:19:35, 15.76s/it]


訓練次數130，總回報-1.2371647509578514


  3%|█                                    | 140/5000 [33:56<12:21:20,  9.15s/it]


訓練次數140，總回報9.94794520547948


  3%|█                                    | 150/5000 [35:36<17:32:46, 13.02s/it]


訓練次數150，總回報23.343205574912872


  3%|█▏                                   | 160/5000 [37:10<14:52:26, 11.06s/it]


訓練次數160，總回報6.6290322580645356


  3%|█▎                                   | 170/5000 [39:17<19:52:41, 14.82s/it]


訓練次數170，總回報29.034934497816536


  4%|█▎                                   | 180/5000 [40:38<12:58:26,  9.69s/it]


訓練次數180，總回報25.93548387096768


  4%|█▍                                   | 190/5000 [42:12<13:19:49,  9.98s/it]


訓練次數190，總回報16.945637583892612


  4%|█▌                                    | 200/5000 [43:17<8:23:30,  6.29s/it]


訓練次數200，總回報17.462162162162038


  4%|█▌                                   | 210/5000 [44:46<13:14:33,  9.95s/it]


訓練次數210，總回報32.996167247386694


  4%|█▋                                   | 220/5000 [46:07<10:14:02,  7.71s/it]


訓練次數220，總回報12.755252918287953


  5%|█▋                                   | 230/5000 [47:29<12:21:10,  9.32s/it]


訓練次數230，總回報20.33735408560309


  5%|█▊                                    | 240/5000 [48:47<9:40:13,  7.31s/it]


訓練次數240，總回報18.118644067796613


  5%|█▉                                    | 250/5000 [49:42<8:44:18,  6.62s/it]


訓練次數250，總回報23.534650455927032


  5%|█▉                                   | 260/5000 [51:01<14:58:54, 11.38s/it]


訓練次數260，總回報20.07697841726618


  5%|██                                    | 270/5000 [52:03<8:43:08,  6.64s/it]


訓練次數270，總回報12.36688963210704


  6%|██▏                                   | 280/5000 [53:08<8:10:32,  6.24s/it]


訓練次數280，總回報45.840963855421585


  6%|██▏                                   | 290/5000 [54:10<8:38:11,  6.60s/it]


訓練次數290，總回報56.27407407407394


  6%|██▏                                  | 300/5000 [55:34<10:42:29,  8.20s/it]


訓練次數300，總回報10.921602787456457


  6%|██▎                                  | 310/5000 [57:18<16:52:33, 12.95s/it]


訓練次數310，總回報29.166666666666615


  6%|██▎                                  | 320/5000 [58:49<17:42:11, 13.62s/it]


訓練次數320，總回報-94.99999999999895


  7%|██▍                                 | 330/5000 [1:00:01<9:26:23,  7.28s/it]


訓練次數330，總回報33.17499999999993


  7%|██▍                                | 340/5000 [1:01:51<14:22:09, 11.10s/it]


訓練次數340，總回報57.301374570446605


  7%|██▌                                 | 350/5000 [1:03:27<9:50:38,  7.62s/it]


訓練次數350，總回報40.34587155963294


  7%|██▌                                | 360/5000 [1:04:57<10:25:09,  8.08s/it]


訓練次數360，總回報16.83987730061351


  7%|██▌                                | 370/5000 [1:06:22<11:37:24,  9.04s/it]


訓練次數370，總回報48.131899641576965


  8%|██▋                                 | 380/5000 [1:07:27<9:22:54,  7.31s/it]


訓練次數380，總回報101.83061224489818


  8%|██▋                                | 390/5000 [1:08:56<10:48:10,  8.44s/it]


訓練次數390，總回報26.284210526315746


  8%|██▊                                | 400/5000 [1:10:17<16:25:43, 12.86s/it]


訓練次數400，總回報39.52206405693944


  8%|██▉                                 | 410/5000 [1:11:27<7:07:52,  5.59s/it]


訓練次數410，總回報31.333779264214005


  8%|██▉                                | 420/5000 [1:13:00<10:13:53,  8.04s/it]


訓練次數420，總回報40.969863013698564


  9%|███                                | 430/5000 [1:14:38<14:04:12, 11.08s/it]


訓練次數430，總回報51.3737704918032


  9%|███▏                                | 440/5000 [1:15:55<8:09:55,  6.45s/it]


訓練次數440，總回報28.164705882352898


  9%|███▏                                | 450/5000 [1:17:00<8:14:04,  6.52s/it]


訓練次數450，總回報35.91162790697669


  9%|███▎                                | 460/5000 [1:18:03<8:45:07,  6.94s/it]


訓練次數460，總回報4.576156583629906


  9%|███▎                               | 470/5000 [1:19:35<11:14:07,  8.93s/it]


訓練次數470，總回報30.24590747330956


 10%|███▎                               | 480/5000 [1:21:10<10:48:46,  8.61s/it]


訓練次數480，總回報36.91904761904755


 10%|███▍                               | 490/5000 [1:23:24<20:10:29, 16.10s/it]


訓練次數490，總回報188.93355704698172


 10%|███▌                               | 500/5000 [1:25:58<19:12:16, 15.36s/it]


訓練次數500，總回報63.78108108108108


 10%|███▌                               | 510/5000 [1:29:05<25:14:07, 20.23s/it]


訓練次數510，總回報54.120664206641976


 10%|███▋                               | 520/5000 [1:32:46<27:55:18, 22.44s/it]


訓練次數520，總回報408.4013605442108


 11%|███▋                               | 530/5000 [1:34:40<15:12:30, 12.25s/it]


訓練次數530，總回報164.35780730897136


 11%|███▊                               | 540/5000 [1:36:45<20:41:31, 16.70s/it]


訓練次數540，總回報38.80495049504943


 11%|███▊                               | 550/5000 [1:38:06<11:23:31,  9.22s/it]


訓練次數550，總回報41.41847507331368


 11%|███▉                               | 560/5000 [1:39:53<12:58:32, 10.52s/it]


訓練次數560，總回報52.317241379310246


 11%|███▉                               | 570/5000 [1:41:49<12:08:54,  9.87s/it]


訓練次數570，總回報52.32491909385105


 12%|████                               | 580/5000 [1:44:35<23:28:48, 19.12s/it]


訓練次數580，總回報237.4812949640303


 12%|████▏                              | 590/5000 [1:47:41<27:51:31, 22.74s/it]


訓練次數590，總回報443.00962199312124


 12%|████▏                              | 600/5000 [1:49:52<14:39:03, 11.99s/it]


訓練次數600，總回報234.41428571428796


 12%|████▎                              | 610/5000 [1:52:31<21:21:00, 17.51s/it]


訓練次數610，總回報257.2876288659796


 12%|████▎                              | 620/5000 [1:54:25<19:23:26, 15.94s/it]


訓練次數620，總回報140.65878136200774


 13%|████▍                              | 630/5000 [1:57:03<23:38:37, 19.48s/it]


訓練次數630，總回報396.14195804195464


 13%|████▍                              | 640/5000 [1:59:38<16:21:42, 13.51s/it]


訓練次數640，總回報105.57894736842127


 13%|████▌                              | 650/5000 [2:02:33<23:19:40, 19.31s/it]


訓練次數650，總回報163.0970684039099


 13%|████▌                              | 660/5000 [2:04:53<19:53:13, 16.50s/it]


訓練次數660，總回報118.39249011857747


 13%|████▋                              | 670/5000 [2:07:00<14:07:40, 11.75s/it]


訓練次數670，總回報29.950406504065


 14%|████▊                              | 680/5000 [2:09:17<23:04:03, 19.22s/it]


訓練次數680，總回報356.05631399317247


 14%|████▊                              | 690/5000 [2:12:19<22:42:34, 18.97s/it]


訓練次數690，總回報644.1606060605965


 14%|████▉                              | 700/5000 [2:13:52<14:35:28, 12.22s/it]


訓練次數700，總回報326.3520900321531


 14%|████▉                              | 710/5000 [2:16:20<16:49:07, 14.11s/it]


訓練次數710，總回報123.26619718309887


 14%|█████                              | 720/5000 [2:19:08<25:12:56, 21.21s/it]


訓練次數720，總回報305.1389261744962


 15%|█████                              | 730/5000 [2:21:11<16:49:00, 14.18s/it]


訓練次數730，總回報323.9210526315778


 15%|█████▏                             | 740/5000 [2:22:26<11:12:28,  9.47s/it]


訓練次數740，總回報247.11538461538558


 15%|█████▎                             | 750/5000 [2:25:10<24:40:40, 20.90s/it]


訓練次數750，總回報735.2583025830156


 15%|█████▎                             | 760/5000 [2:28:04<24:38:33, 20.92s/it]


訓練次數760，總回報297.5343434343439


 15%|█████▍                             | 770/5000 [2:30:38<19:54:24, 16.94s/it]


訓練次數770，總回報98.2894736842107


 16%|█████▍                             | 780/5000 [2:33:01<12:48:47, 10.93s/it]


訓練次數780，總回報64.74736842105256


 16%|█████▌                             | 790/5000 [2:35:05<21:46:49, 18.62s/it]


訓練次數790，總回報219.6933753943237


 16%|█████▌                             | 800/5000 [2:36:55<16:11:41, 13.88s/it]


訓練次數800，總回報63.30270270270263


 16%|█████▋                             | 810/5000 [2:38:50<13:06:36, 11.26s/it]


訓練次數810，總回報46.67126436781602


 16%|█████▋                             | 820/5000 [2:41:25<17:38:14, 15.19s/it]


訓練次數820，總回報363.14223826714647


 17%|█████▊                             | 830/5000 [2:43:41<18:32:48, 16.01s/it]


訓練次數830，總回報225.34225352112807


 17%|█████▉                             | 840/5000 [2:45:26<13:46:50, 11.93s/it]


訓練次數840，總回報42.88078291814939


 17%|█████▉                             | 850/5000 [2:47:32<14:09:40, 12.28s/it]


訓練次數850，總回報301.17831715210326


 17%|██████                             | 860/5000 [2:49:27<16:24:20, 14.27s/it]


訓練次數860，總回報244.36419753086574


 17%|██████                             | 870/5000 [2:52:01<26:12:21, 22.84s/it]


訓練次數870，總回報551.1413533834512


 18%|██████▏                            | 880/5000 [2:54:49<22:42:12, 19.84s/it]


訓練次數880，總回報258.3606557377062


 18%|██████▏                            | 890/5000 [2:57:19<14:45:30, 12.93s/it]


訓練次數890，總回報315.8687500000001


 18%|██████▎                            | 900/5000 [2:59:38<18:46:29, 16.49s/it]


訓練次數900，總回報42.331578947368364


 18%|██████▎                            | 910/5000 [3:02:07<16:57:38, 14.93s/it]


訓練次數910，總回報545.6617529880426


 18%|██████▍                            | 920/5000 [3:04:18<17:04:49, 15.07s/it]


訓練次數920，總回報271.2802047781579


 19%|██████▌                            | 930/5000 [3:06:17<12:11:39, 10.79s/it]


訓練次數930，總回報229.9870129870143


 19%|██████▌                            | 940/5000 [3:08:33<17:58:35, 15.94s/it]


訓練次數940，總回報165.4064327485387


 19%|██████▋                            | 950/5000 [3:10:33<16:08:53, 14.35s/it]


訓練次數950，總回報191.92222222222316


 19%|██████▋                            | 960/5000 [3:12:52<19:42:34, 17.56s/it]


訓練次數960，總回報219.82508250825163


 19%|██████▊                            | 970/5000 [3:14:23<11:29:30, 10.27s/it]


訓練次數970，總回報68.73727598566306


 20%|██████▊                            | 980/5000 [3:16:01<13:01:43, 11.67s/it]


訓練次數980，總回報76.95175718849846


 20%|██████▉                            | 990/5000 [3:18:12<20:33:14, 18.45s/it]


訓練次數990，總回報567.32107279693


 20%|██████▊                           | 1000/5000 [3:21:06<22:34:28, 20.32s/it]


訓練次數1000，總回報879.3655870445207


 20%|██████▊                           | 1010/5000 [3:23:52<23:03:49, 20.81s/it]


訓練次數1010，總回報158.5428571428578


 20%|██████▉                           | 1020/5000 [3:25:59<19:34:44, 17.71s/it]


訓練次數1020，總回報203.06209150326924


 21%|███████                           | 1030/5000 [3:28:16<20:37:12, 18.70s/it]


訓練次數1030，總回報207.4952380952392


 21%|███████                           | 1040/5000 [3:30:40<21:19:41, 19.39s/it]


訓練次數1040，總回報544.215068493147


 21%|███████▏                          | 1050/5000 [3:33:00<20:42:02, 18.87s/it]


訓練次數1050，總回報217.95910652921037


 21%|███████▏                          | 1060/5000 [3:35:24<20:14:36, 18.50s/it]


訓練次數1060，總回報407.3454545454511


 21%|███████▎                          | 1070/5000 [3:37:58<14:25:31, 13.21s/it]


訓練次數1070，總回報323.7886075949363


 22%|███████▎                          | 1080/5000 [3:40:51<26:01:27, 23.90s/it]


訓練次數1080，總回報834.7352112675901


 22%|███████▍                          | 1090/5000 [3:43:01<15:05:52, 13.90s/it]


訓練次數1090，總回報88.57710437710446


 22%|███████▍                          | 1100/5000 [3:44:30<11:54:47, 11.00s/it]


訓練次數1100，總回報86.28303249097482


 22%|███████▌                          | 1110/5000 [3:46:19<15:05:44, 13.97s/it]


訓練次數1110，總回報268.42930402930426


 22%|███████▌                          | 1120/5000 [3:47:59<13:16:43, 12.32s/it]


訓練次數1120，總回報388.0662921348303


 23%|███████▋                          | 1130/5000 [3:49:29<10:43:12,  9.97s/it]


訓練次數1130，總回報383.77801418439606


 23%|███████▊                          | 1140/5000 [3:51:07<13:55:29, 12.99s/it]


訓練次數1140，總回報216.34437086092797


 23%|███████▊                          | 1150/5000 [3:53:30<16:06:43, 15.07s/it]


訓練次數1150，總回報80.83313609467454


 23%|███████▉                          | 1160/5000 [3:55:39<16:30:39, 15.48s/it]


訓練次數1160，總回報295.7132841328416


 23%|███████▉                          | 1170/5000 [3:57:56<14:06:00, 13.25s/it]


訓練次數1170，總回報238.82457337884074


 24%|████████                          | 1180/5000 [3:59:44<15:32:44, 14.65s/it]


訓練次數1180，總回報331.92473498233215


 24%|████████                          | 1190/5000 [4:02:26<19:01:32, 17.98s/it]


訓練次數1190，總回報828.889855072455


 24%|████████▏                         | 1200/5000 [4:04:43<14:01:03, 13.28s/it]


訓練次數1200，總回報119.24050632911415


 24%|████████▏                         | 1210/5000 [4:06:59<14:46:32, 14.04s/it]


訓練次數1210，總回報482.58823529411535


 24%|████████▎                         | 1220/5000 [4:09:29<15:32:47, 14.81s/it]


訓練次數1220，總回報280.381270903011


 25%|████████▎                         | 1230/5000 [4:11:37<15:19:12, 14.63s/it]


訓練次數1230，總回報385.95076452599267


 25%|████████▍                         | 1240/5000 [4:13:36<17:50:36, 17.08s/it]


訓練次數1240，總回報306.4508771929817


 25%|████████▌                         | 1250/5000 [4:16:03<17:48:30, 17.10s/it]


訓練次數1250，總回報343.67972508590987


 25%|████████▌                         | 1260/5000 [4:18:49<21:36:31, 20.80s/it]


訓練次數1260，總回報564.7630363036267


 25%|████████▋                         | 1270/5000 [4:21:01<17:41:51, 17.08s/it]


訓練次數1270，總回報584.535766423353


 26%|████████▋                         | 1280/5000 [4:23:20<18:35:50, 18.00s/it]


訓練次數1280，總回報383.8614035087695


 26%|████████▊                         | 1290/5000 [4:25:43<13:37:41, 13.22s/it]


訓練次數1290，總回報106.92768166089982


 26%|████████▊                         | 1300/5000 [4:27:42<14:49:00, 14.42s/it]


訓練次數1300，總回報223.7517133956398


 26%|████████▉                         | 1310/5000 [4:30:33<19:53:38, 19.41s/it]


訓練次數1310，總回報233.8058823529423


 26%|████████▉                         | 1320/5000 [4:33:13<13:28:33, 13.18s/it]


訓練次數1320，總回報111.52125984251991


 27%|█████████                         | 1330/5000 [4:35:29<20:50:29, 20.44s/it]


訓練次數1330，總回報844.2257485029792


 27%|█████████                         | 1340/5000 [4:37:44<14:48:42, 14.57s/it]


訓練次數1340，總回報257.44054054054163


 27%|█████████▏                        | 1350/5000 [4:40:36<21:10:13, 20.88s/it]


訓練次數1350，總回報901.8426229508082


 27%|█████████▏                        | 1360/5000 [4:43:09<17:15:53, 17.08s/it]


訓練次數1360，總回報271.56934306569434


 27%|█████████▎                        | 1370/5000 [4:45:58<22:38:35, 22.46s/it]


訓練次數1370，總回報238.22727272727414


 28%|█████████▍                        | 1380/5000 [4:49:15<20:26:38, 20.33s/it]


訓練次數1380，總回報832.1999999999866


 28%|█████████▍                        | 1390/5000 [4:51:41<16:17:07, 16.24s/it]


訓練次數1390，總回報108.71232876712357


 28%|█████████▌                        | 1400/5000 [4:53:55<11:44:57, 11.75s/it]


訓練次數1400，總回報46.04285714285709


 28%|█████████▌                        | 1410/5000 [4:56:41<16:00:13, 16.05s/it]


訓練次數1410，總回報593.9307189542412


 28%|█████████▋                        | 1420/5000 [4:59:13<11:59:24, 12.06s/it]


訓練次數1420，總回報394.32439024390055


 29%|█████████▋                        | 1430/5000 [5:02:11<18:50:15, 19.00s/it]


訓練次數1430，總回報870.1968858131435


 29%|█████████▊                        | 1440/5000 [5:05:11<18:19:59, 18.54s/it]


訓練次數1440，總回報496.8625899280532


 29%|█████████▊                        | 1450/5000 [5:07:43<20:51:15, 21.15s/it]


訓練次數1450，總回報751.5904942965676


 29%|█████████▉                        | 1460/5000 [5:10:19<13:26:32, 13.67s/it]


訓練次數1460，總回報343.73861386138566


 29%|█████████▉                        | 1470/5000 [5:12:58<16:39:13, 16.98s/it]


訓練次數1470，總回報684.9121212121145


 30%|██████████                        | 1480/5000 [5:15:18<16:18:36, 16.68s/it]


訓練次數1480，總回報273.8592057761737


 30%|██████████▍                        | 1490/5000 [5:16:56<8:51:27,  9.08s/it]


訓練次數1490，總回報257.6223880597024


 30%|██████████▏                       | 1500/5000 [5:18:53<16:05:57, 16.56s/it]


訓練次數1500，總回報62.324324324324245


 30%|██████████▎                       | 1510/5000 [5:20:46<11:52:03, 12.24s/it]


訓練次數1510，總回報263.7165680473386


 30%|██████████▎                       | 1520/5000 [5:22:37<11:55:34, 12.34s/it]


訓練次數1520，總回報533.9732441471533


 31%|██████████▍                       | 1530/5000 [5:25:06<15:54:06, 16.50s/it]


訓練次數1530，總回報381.47434944237807


 31%|██████████▍                       | 1540/5000 [5:27:36<16:54:59, 17.60s/it]


訓練次數1540，總回報69.06923076923073


 31%|██████████▌                       | 1550/5000 [5:30:40<18:42:22, 19.52s/it]


訓練次數1550，總回報449.74983498349695


 31%|██████████▌                       | 1560/5000 [5:33:37<17:04:13, 17.86s/it]


訓練次數1560，總回報393.5416107382541


 31%|██████████▋                       | 1570/5000 [5:35:45<14:07:26, 14.82s/it]


訓練次數1570，總回報132.3423728813562


 32%|██████████▋                       | 1580/5000 [5:38:42<19:02:19, 20.04s/it]


訓練次數1580，總回報349.63050847457527


 32%|██████████▊                       | 1590/5000 [5:41:34<17:14:24, 18.20s/it]


訓練次數1590，總回報599.6406593406552


 32%|██████████▉                       | 1600/5000 [5:43:57<18:21:55, 19.45s/it]


訓練次數1600，總回報363.8512820512818


 32%|██████████▉                       | 1610/5000 [5:45:44<10:54:49, 11.59s/it]


訓練次數1610，總回報338.4123287671228


 32%|███████████                       | 1620/5000 [5:48:05<15:12:07, 16.19s/it]


訓練次數1620，總回報179.4503875968998


 33%|███████████                       | 1630/5000 [5:50:33<15:27:39, 16.52s/it]


訓練次數1630，總回報724.2618296529888


 33%|███████████▏                      | 1640/5000 [5:53:21<19:18:04, 20.68s/it]


訓練次數1640，總回報293.264788732395


 33%|███████████▏                      | 1650/5000 [5:55:44<14:45:21, 15.86s/it]


訓練次數1650，總回報58.499999999999936


 33%|███████████▎                      | 1660/5000 [5:57:46<17:55:13, 19.32s/it]


訓練次數1660，總回報730.0452054794431


 33%|███████████▎                      | 1670/5000 [5:59:52<14:58:27, 16.19s/it]


訓練次數1670，總回報358.00877192982415


 34%|███████████▍                      | 1680/5000 [6:02:37<22:56:34, 24.88s/it]


訓練次數1680，總回報913.9199261992551


 34%|███████████▍                      | 1690/5000 [6:05:04<17:13:24, 18.73s/it]


訓練次數1690，總回報499.95855855855615


 34%|███████████▉                       | 1700/5000 [6:06:22<6:44:55,  7.36s/it]


訓練次數1700，總回報44.73770491803275


 34%|███████████▋                      | 1710/5000 [6:09:03<17:17:09, 18.91s/it]


訓練次數1710，總回報54.017241379310285


 34%|███████████▋                      | 1720/5000 [6:11:19<13:26:01, 14.74s/it]


訓練次數1720，總回報162.80158730158786


 35%|████████████                       | 1730/5000 [6:13:15<9:18:08, 10.24s/it]


訓練次數1730，總回報367.8147540983596


 35%|███████████▊                      | 1740/5000 [6:15:48<15:17:58, 16.90s/it]


訓練次數1740，總回報913.4739926739849


 35%|███████████▉                      | 1750/5000 [6:18:44<15:19:44, 16.98s/it]


訓練次數1750，總回報433.0297297297281


 35%|███████████▉                      | 1760/5000 [6:20:41<16:47:23, 18.66s/it]


訓練次數1760，總回報674.5153846153781


 35%|████████████                      | 1770/5000 [6:23:42<16:10:57, 18.04s/it]


訓練次數1770，總回報912.7178988326766


 36%|████████████                      | 1780/5000 [6:26:04<13:30:40, 15.11s/it]


訓練次數1780，總回報132.34346289752676


 36%|████████████▏                     | 1790/5000 [6:28:06<13:08:58, 14.75s/it]


訓練次數1790，總回報453.1619047619025


 36%|████████████▏                     | 1800/5000 [6:31:00<19:08:13, 21.53s/it]


訓練次數1800，總回報98.78257839721269


 36%|████████████▎                     | 1810/5000 [6:32:54<14:14:11, 16.07s/it]


訓練次數1810，總回報502.04032921810455


 36%|████████████▍                     | 1820/5000 [6:34:29<10:22:53, 11.75s/it]


訓練次數1820，總回報261.6317073170747


 37%|████████████▍                     | 1830/5000 [6:36:32<14:47:53, 16.81s/it]


訓練次數1830，總回報467.7551928783358


 37%|████████████▌                     | 1840/5000 [6:38:30<14:55:29, 17.00s/it]


訓練次數1840，總回報453.8667844522959


 37%|████████████▌                     | 1850/5000 [6:40:35<12:49:55, 14.67s/it]


訓練次數1850，總回報493.1504983388662


 37%|████████████▋                     | 1860/5000 [6:42:48<13:10:13, 15.10s/it]


訓練次數1860，總回報285.06283987915424


 37%|████████████▋                     | 1870/5000 [6:45:10<16:39:36, 19.16s/it]


訓練次數1870，總回報41.32558139534878


 38%|████████████▊                     | 1880/5000 [6:47:31<16:02:33, 18.51s/it]


訓練次數1880，總回報359.08493150684836


 38%|████████████▊                     | 1890/5000 [6:49:43<10:33:32, 12.22s/it]


訓練次數1890，總回報169.81901840490855


 38%|████████████▉                     | 1900/5000 [6:52:31<20:29:25, 23.80s/it]


訓練次數1900，總回報548.3956521739068


 38%|████████████▉                     | 1910/5000 [6:55:19<15:10:43, 17.68s/it]


訓練次數1910，總回報880.7809523809453


 38%|█████████████                     | 1920/5000 [6:57:42<17:11:05, 20.09s/it]


訓練次數1920，總回報875.5118644067677


 39%|█████████████                     | 1930/5000 [7:00:16<14:32:12, 17.05s/it]


訓練次數1930，總回報502.73220338982674


 39%|█████████████▏                    | 1940/5000 [7:02:54<14:14:53, 16.76s/it]


訓練次數1940，總回報182.50569395017865


 39%|█████████████▎                    | 1950/5000 [7:05:10<10:45:45, 12.70s/it]


訓練次數1950，總回報61.72657342657333


 39%|█████████████▎                    | 1960/5000 [7:07:11<13:17:55, 15.75s/it]


訓練次數1960，總回報777.237254901951


 39%|█████████████▍                    | 1970/5000 [7:09:13<13:51:15, 16.46s/it]


訓練次數1970，總回報362.3220216606484


 40%|█████████████▍                    | 1980/5000 [7:11:36<13:43:40, 16.36s/it]


訓練次數1980，總回報281.6014084507051


 40%|█████████████▌                    | 1990/5000 [7:14:03<11:48:58, 14.13s/it]


訓練次數1990，總回報745.4362416107291


 40%|█████████████▌                    | 2000/5000 [7:16:49<18:10:06, 21.80s/it]


訓練次數2000，總回報276.3501858736063


 40%|█████████████▋                    | 2010/5000 [7:19:27<16:24:36, 19.76s/it]


訓練次數2010，總回報641.4181818181744


 40%|█████████████▋                    | 2020/5000 [7:21:53<14:15:53, 17.23s/it]


訓練次數2020，總回報797.2705882352839


 41%|█████████████▊                    | 2030/5000 [7:24:53<15:01:15, 18.21s/it]


訓練次數2030，總回報388.92682926829224


 41%|█████████████▊                    | 2040/5000 [7:27:37<18:10:36, 22.11s/it]


訓練次數2040，總回報566.5542168674647


 41%|█████████████▉                    | 2050/5000 [7:30:47<14:00:00, 17.08s/it]


訓練次數2050，總回報474.37184466019016


 41%|██████████████                    | 2060/5000 [7:33:29<16:36:51, 20.34s/it]


訓練次數2060，總回報484.5570469798638


 41%|██████████████                    | 2070/5000 [7:36:14<17:11:03, 21.11s/it]


訓練次數2070，總回報383.98062015503854


 42%|██████████████▏                   | 2080/5000 [7:39:00<11:55:28, 14.70s/it]


訓練次數2080，總回報53.86849315068486


 42%|██████████████▏                   | 2090/5000 [7:41:38<15:55:48, 19.71s/it]


訓練次數2090，總回報355.8769230769218


 42%|██████████████▎                   | 2100/5000 [7:44:10<11:58:10, 14.86s/it]


訓練次數2100，總回報570.5999999999942


 42%|██████████████▎                   | 2110/5000 [7:47:23<19:01:36, 23.70s/it]


訓練次數2110，總回報904.9333333333255


 42%|██████████████▍                   | 2120/5000 [7:50:16<19:01:21, 23.78s/it]


訓練次數2120，總回報562.3818181818125


 43%|██████████████▉                    | 2130/5000 [7:52:20<8:51:48, 11.12s/it]


訓練次數2130，總回報122.1348837209305


 43%|██████████████▌                   | 2140/5000 [7:55:10<18:59:58, 23.92s/it]


訓練次數2140，總回報583.5779661016898


 43%|██████████████▌                   | 2150/5000 [7:57:56<14:11:43, 17.93s/it]


訓練次數2150，總回報500.95667870035595


 43%|██████████████▋                   | 2160/5000 [8:00:44<15:17:44, 19.39s/it]


訓練次數2160，總回報503.73802816901156


 43%|██████████████▊                   | 2170/5000 [8:03:14<14:27:21, 18.39s/it]


訓練次數2170，總回報108.66125461254641


 44%|██████████████▊                   | 2180/5000 [8:05:29<12:55:18, 16.50s/it]


訓練次數2180，總回報395.5755102040807


 44%|██████████████▉                   | 2190/5000 [8:08:16<14:02:46, 18.00s/it]


訓練次數2190，總回報486.48247422680174


 44%|██████████████▉                   | 2200/5000 [8:11:29<16:50:03, 21.64s/it]


訓練次數2200，總回報466.77944250870735


 44%|███████████████                   | 2210/5000 [8:14:01<14:36:59, 18.86s/it]


訓練次數2210，總回報488.53188405796925


 44%|███████████████                   | 2220/5000 [8:16:36<13:04:20, 16.93s/it]


訓練次數2220，總回報887.155223880586


 45%|███████████████▏                  | 2230/5000 [8:19:04<12:31:44, 16.28s/it]


訓練次數2230，總回報618.6425087107953


 45%|███████████████▏                  | 2240/5000 [8:21:38<12:57:10, 16.90s/it]


訓練次數2240，總回報910.9659932659797


 45%|███████████████▎                  | 2250/5000 [8:23:58<12:10:52, 15.95s/it]


訓練次數2250，總回報597.5822695035423


 45%|███████████████▊                   | 2260/5000 [8:25:50<9:50:44, 12.94s/it]


訓練次數2260，總回報55.406249999999915


 45%|███████████████▍                  | 2270/5000 [8:28:09<11:04:21, 14.60s/it]


訓練次數2270，總回報346.62247191011164


 46%|███████████████▌                  | 2280/5000 [8:30:26<13:39:26, 18.08s/it]


訓練次數2280，總回報138.1220338983054


 46%|████████████████                   | 2290/5000 [8:32:10<8:31:22, 11.32s/it]


訓練次數2290，總回報310.5053231939167


 46%|████████████████                   | 2300/5000 [8:33:43<7:12:37,  9.61s/it]


訓練次數2300，總回報278.30322580645174


 46%|████████████████▏                  | 2310/5000 [8:35:30<9:53:00, 13.23s/it]


訓練次數2310，總回報344.6153846153847


 46%|███████████████▊                  | 2320/5000 [8:38:07<13:52:16, 18.63s/it]


訓練次數2320，總回報428.80273037542304


 47%|████████████████▎                  | 2330/5000 [8:39:41<8:22:20, 11.29s/it]


訓練次數2330，總回報652.054430379742


 47%|████████████████▍                  | 2340/5000 [8:41:47<9:06:56, 12.34s/it]


訓練次數2340，總回報879.3925373134217


 47%|███████████████▉                  | 2350/5000 [8:43:50<10:33:55, 14.35s/it]


訓練次數2350，總回報914.3069930069809


 47%|████████████████                  | 2360/5000 [8:45:55<14:00:56, 19.11s/it]


訓練次數2360，總回報701.1508833922198


 47%|████████████████                  | 2370/5000 [8:47:32<10:22:06, 14.19s/it]


訓練次數2370，總回報215.99124579124697


 48%|████████████████▋                  | 2380/5000 [8:49:23<8:50:48, 12.16s/it]


訓練次數2380，總回報607.1971962616755


 48%|████████████████▎                 | 2390/5000 [8:51:54<16:09:02, 22.28s/it]


訓練次數2390，總回報911.1795847750801


 48%|████████████████▊                  | 2400/5000 [8:53:32<6:48:05,  9.42s/it]


訓練次數2400，總回報134.02957746478907


 48%|████████████████▍                 | 2410/5000 [8:55:45<11:55:54, 16.58s/it]


訓練次數2410，總回報683.8373134328284


 48%|████████████████▉                  | 2420/5000 [8:57:40<9:01:27, 12.59s/it]


訓練次數2420，總回報413.57364620938415


 49%|████████████████▌                 | 2430/5000 [8:59:45<10:57:33, 15.35s/it]


訓練次數2430，總回報544.651351351347


 49%|█████████████████                  | 2440/5000 [9:01:57<9:38:36, 13.56s/it]


訓練次數2440，總回報49.71081081081074


 49%|████████████████▋                 | 2450/5000 [9:04:49<14:59:54, 21.17s/it]


訓練次數2450，總回報651.4605095541301


 49%|████████████████▋                 | 2460/5000 [9:07:20<11:48:02, 16.73s/it]


訓練次數2460，總回報415.0836734693867


 49%|█████████████████▎                 | 2470/5000 [9:09:05<7:53:44, 11.23s/it]


訓練次數2470，總回報147.03893129771032


 50%|████████████████▊                 | 2480/5000 [9:11:53<15:57:32, 22.80s/it]


訓練次數2480，總回報898.6313588850035


 50%|████████████████▉                 | 2490/5000 [9:14:13<13:35:07, 19.48s/it]


訓練次數2490，總回報381.0743494423781


 50%|█████████████████                 | 2500/5000 [9:16:54<13:30:43, 19.46s/it]


訓練次數2500，總回報916.3797833934976


 50%|█████████████████▌                 | 2510/5000 [9:19:27<9:03:29, 13.10s/it]


訓練次數2510，總回報126.88141263940551


 50%|█████████████████▏                | 2520/5000 [9:22:16<12:57:09, 18.80s/it]


訓練次數2520，總回報912.0271477663088


 51%|█████████████████▏                | 2530/5000 [9:25:21<16:00:00, 23.32s/it]


訓練次數2530，總回報-94.999999999999


 51%|█████████████████▎                | 2540/5000 [9:27:49<10:05:05, 14.76s/it]


訓練次數2540，總回報521.060317460315


 51%|█████████████████▎                | 2550/5000 [9:30:06<10:31:02, 15.45s/it]


訓練次數2550，總回報403.50872483221383


 51%|█████████████████▉                 | 2560/5000 [9:32:09<8:06:35, 11.97s/it]


訓練次數2560，總回報660.0729729729654


 51%|█████████████████▍                | 2570/5000 [9:34:18<11:37:40, 17.23s/it]


訓練次數2570，總回報515.0768953068552


 52%|█████████████████▌                | 2580/5000 [9:36:31<10:16:17, 15.28s/it]


訓練次數2580，總回報918.7664122137339


 52%|█████████████████▌                | 2590/5000 [9:38:54<14:32:55, 21.73s/it]


訓練次數2590，總回報538.7707395498336


 52%|█████████████████▋                | 2600/5000 [9:41:20<12:16:47, 18.42s/it]


訓練次數2600，總回報877.4901840490721


 52%|██████████████████▎                | 2610/5000 [9:42:57<7:35:57, 11.45s/it]


訓練次數2610，總回報917.6536231884002


 52%|█████████████████▊                | 2620/5000 [9:45:23<11:27:13, 17.32s/it]


訓練次數2620，總回報106.50602409638586


 53%|█████████████████▉                | 2630/5000 [9:47:35<10:55:32, 16.60s/it]


訓練次數2630，總回報911.7577464788583


 53%|█████████████████▉                | 2640/5000 [9:49:49<11:40:10, 17.80s/it]


訓練次數2640，總回報377.26074766355043


 53%|██████████████████▌                | 2650/5000 [9:51:48<7:26:39, 11.40s/it]


訓練次數2650，總回報155.65117845117896


 53%|██████████████████▌                | 2660/5000 [9:53:33<6:40:56, 10.28s/it]


訓練次數2660，總回報243.53127413127538


 53%|██████████████████▏               | 2670/5000 [9:55:38<10:21:52, 16.01s/it]


訓練次數2670，總回報884.646808510631


 54%|██████████████████▏               | 2680/5000 [9:58:26<12:11:27, 18.92s/it]


訓練次數2680，總回報905.5993399339856


 54%|█████████████████▊               | 2690/5000 [10:00:56<11:29:58, 17.92s/it]


訓練次數2690，總回報412.33745173744927


 54%|█████████████████▊               | 2700/5000 [10:03:27<11:51:11, 18.55s/it]


訓練次數2700，總回報911.703448275849


 54%|█████████████████▉               | 2710/5000 [10:05:37<10:29:06, 16.48s/it]


訓練次數2710，總回報915.377992277978


 54%|█████████████████▉               | 2720/5000 [10:08:42<15:17:30, 24.14s/it]


訓練次數2720，總回報757.0666666666608


 55%|██████████████████               | 2730/5000 [10:11:10<13:18:06, 21.10s/it]


訓練次數2730，總回報859.6827794561748


 55%|██████████████████               | 2740/5000 [10:13:54<13:05:59, 20.87s/it]


訓練次數2740，總回報662.4421052631496


 55%|██████████████████▏              | 2750/5000 [10:16:18<12:18:43, 19.70s/it]


訓練次數2750，總回報755.8771929824428


 55%|██████████████████▊               | 2760/5000 [10:18:16<7:57:30, 12.79s/it]


訓練次數2760，總回報559.520274914085


 55%|██████████████████▎              | 2770/5000 [10:20:47<13:01:12, 21.02s/it]


訓練次數2770，總回報-94.99999999999898


 56%|██████████████████▉               | 2780/5000 [10:23:21<9:48:51, 15.91s/it]


訓練次數2780，總回報103.60133779264237


 56%|██████████████████▍              | 2790/5000 [10:25:37<12:03:09, 19.63s/it]


訓練次數2790，總回報895.6654135338218


 56%|██████████████████▍              | 2800/5000 [10:28:17<10:31:58, 17.24s/it]


訓練次數2800，總回報885.0451612903128


 56%|██████████████████▌              | 2810/5000 [10:30:46<10:30:02, 17.26s/it]


訓練次數2810，總回報346.1153846153845


 56%|███████████████████▏              | 2820/5000 [10:32:25<8:18:51, 13.73s/it]


訓練次數2820，總回報511.7575757575677


 57%|██████████████████▋              | 2830/5000 [10:35:35<12:25:47, 20.62s/it]


訓練次數2830，總回報751.4999999999942


 57%|██████████████████▋              | 2840/5000 [10:37:44<10:04:20, 16.79s/it]


訓練次數2840，總回報461.19679715302266


 57%|███████████████████▍              | 2850/5000 [10:39:45<8:56:13, 14.96s/it]


訓練次數2850，總回報256.03414634146486


 57%|███████████████████▍              | 2860/5000 [10:41:40<8:04:01, 13.57s/it]


訓練次數2860，總回報122.72140221402249


 57%|███████████████████▌              | 2870/5000 [10:43:35<6:45:43, 11.43s/it]


訓練次數2870，總回報592.1226765799215


 58%|███████████████████▌              | 2880/5000 [10:45:35<8:21:35, 14.20s/it]


訓練次數2880，總回報186.18628158844803


 58%|███████████████████▋              | 2890/5000 [10:47:45<8:20:47, 14.24s/it]


訓練次數2890，總回報448.43223140495587


 58%|███████████████████▋              | 2900/5000 [10:49:28<6:06:36, 10.47s/it]


訓練次數2900，總回報566.3942122186439


 58%|███████████████████▏             | 2910/5000 [10:52:13<12:34:03, 21.65s/it]


訓練次數2910，總回報299.57831325299964


 58%|███████████████████▊              | 2920/5000 [10:54:45<9:26:37, 16.35s/it]


訓練次數2920，總回報563.4496644295264


 59%|███████████████████▉              | 2930/5000 [10:56:52<8:47:54, 15.30s/it]


訓練次數2930，總回報535.6307167235436


 59%|███████████████████▍             | 2940/5000 [11:00:00<11:05:01, 19.37s/it]


訓練次數2940，總回報903.3110367892803


 59%|████████████████████              | 2950/5000 [11:02:52<9:05:20, 15.96s/it]


訓練次數2950，總回報910.99259259258


 59%|████████████████████▏             | 2960/5000 [11:05:13<8:45:26, 15.45s/it]


訓練次數2960，總回報882.9107023411218


 59%|███████████████████▌             | 2970/5000 [11:07:49<12:44:07, 22.58s/it]


訓練次數2970，總回報751.4101910827883


 60%|████████████████████▎             | 2980/5000 [11:10:14<8:13:43, 14.67s/it]


訓練次數2980，總回報398.2671641791023


 60%|████████████████████▎             | 2990/5000 [11:12:47<8:55:01, 15.97s/it]


訓練次數2990，總回報661.9432835820799


 60%|████████████████████▍             | 3000/5000 [11:14:43<7:15:08, 13.05s/it]


訓練次數3000，總回報329.3539682539678


 60%|████████████████████▍             | 3010/5000 [11:16:45<9:41:59, 17.55s/it]


訓練次數3010，總回報348.4373088685012


 60%|███████████████████▉             | 3020/5000 [11:19:15<10:52:21, 19.77s/it]


訓練次數3020，總回報428.53833865814534


 61%|████████████████████▌             | 3030/5000 [11:21:14<7:15:55, 13.28s/it]


訓練次數3030，總回報660.9636363636298


 61%|████████████████████▋             | 3040/5000 [11:23:10<6:27:56, 11.88s/it]


訓練次數3040，總回報139.03511705685682


 61%|████████████████████▋             | 3050/5000 [11:25:11<6:22:09, 11.76s/it]


訓練次數3050，總回報101.67037037037052


 61%|████████████████████▊             | 3060/5000 [11:27:42<8:56:52, 16.60s/it]


訓練次數3060，總回報740.3537102473422


 61%|████████████████████▉             | 3070/5000 [11:30:22<9:18:34, 17.37s/it]


訓練次數3070，總回報912.4271477663086


 62%|████████████████████▎            | 3080/5000 [11:32:55<10:55:26, 20.48s/it]


訓練次數3080，總回報671.8856677524315


 62%|████████████████████▍            | 3090/5000 [11:35:28<10:03:53, 18.97s/it]


訓練次數3090，總回報225.0722222222234


 62%|█████████████████████             | 3100/5000 [11:37:16<7:35:48, 14.39s/it]


訓練次數3100，總回報111.47894736842126


 62%|█████████████████████▏            | 3110/5000 [11:39:22<7:35:03, 14.45s/it]


訓練次數3110，總回報918.7779922779788


 62%|█████████████████████▏            | 3120/5000 [11:41:38<7:41:03, 14.71s/it]


訓練次數3120，總回報390.6344827586184


 63%|█████████████████████▎            | 3130/5000 [11:43:42<5:29:07, 10.56s/it]


訓練次數3130，總回報496.0019762845833


 63%|█████████████████████▎            | 3140/5000 [11:46:10<6:41:57, 12.97s/it]


訓練次數3140，總回報234.0140468227438


 63%|█████████████████████▍            | 3150/5000 [11:48:37<9:27:24, 18.40s/it]


訓練次數3150，總回報717.8631578947271


 63%|█████████████████████▍            | 3160/5000 [11:50:39<7:53:33, 15.44s/it]


訓練次數3160，總回報377.356877323419


 63%|█████████████████████▌            | 3170/5000 [11:52:51<7:41:36, 15.13s/it]


訓練次數3170，總回報428.65135135134943


 64%|█████████████████████▌            | 3180/5000 [11:55:24<8:34:06, 16.95s/it]


訓練次數3180，總回報403.3381877022629


 64%|█████████████████████            | 3190/5000 [11:57:57<10:58:40, 21.83s/it]


訓練次數3190，總回報598.0203821655962


 64%|█████████████████████▊            | 3200/5000 [11:59:55<5:53:59, 11.80s/it]


訓練次數3200，總回報185.7058823529418


 64%|█████████████████████▊            | 3210/5000 [12:02:49<8:57:09, 18.01s/it]


訓練次數3210，總回報907.4064935064775


 64%|█████████████████████▉            | 3220/5000 [12:05:31<8:58:25, 18.15s/it]


訓練次數3220，總回報576.617993079582


 65%|█████████████████████▉            | 3230/5000 [12:08:11<9:13:22, 18.76s/it]


訓練次數3230，總回報658.2262295081883


 65%|██████████████████████            | 3240/5000 [12:09:52<5:30:20, 11.26s/it]


訓練次數3240，總回報763.6643356643262


 65%|██████████████████████            | 3250/5000 [12:11:59<7:44:57, 15.94s/it]


訓練次數3250，總回報429.4273615635162


 65%|██████████████████████▏           | 3260/5000 [12:13:51<5:29:57, 11.38s/it]


訓練次數3260，總回報345.35209003215317


 65%|██████████████████████▏           | 3270/5000 [12:15:55<6:29:24, 13.51s/it]


訓練次數3270，總回報233.51134751773176


 66%|██████████████████████▎           | 3280/5000 [12:18:15<8:02:57, 16.85s/it]


訓練次數3280，總回報624.9024767801811


 66%|██████████████████████▎           | 3290/5000 [12:20:51<9:27:54, 19.93s/it]


訓練次數3290，總回報906.7178988326756


 66%|██████████████████████▍           | 3300/5000 [12:23:03<7:00:16, 14.83s/it]


訓練次數3300，總回報401.30860927152224


 66%|██████████████████████▌           | 3310/5000 [12:25:15<7:44:45, 16.50s/it]


訓練次數3310，總回報918.3954372623427


 66%|██████████████████████▌           | 3320/5000 [12:27:37<7:18:49, 15.67s/it]


訓練次數3320，總回報682.4747474747389


 67%|██████████████████████▋           | 3330/5000 [12:29:19<6:56:32, 14.97s/it]


訓練次數3330，總回報544.484210526311


 67%|██████████████████████▋           | 3340/5000 [12:31:36<6:56:11, 15.04s/it]


訓練次數3340，總回報350.11038961038963


 67%|██████████████████████▊           | 3350/5000 [12:33:41<6:38:24, 14.49s/it]


訓練次數3350，總回報547.185977859776


 67%|██████████████████████▊           | 3360/5000 [12:35:08<4:33:36, 10.01s/it]


訓練次數3360，總回報363.2052631578932


 67%|██████████████████████▉           | 3370/5000 [12:37:21<6:01:37, 13.31s/it]


訓練次數3370，總回報301.0346938775512


 68%|██████████████████████▉           | 3380/5000 [12:38:50<5:12:52, 11.59s/it]


訓練次數3380，總回報430.614478114475


 68%|███████████████████████           | 3390/5000 [12:40:13<3:59:53,  8.94s/it]


訓練次數3390，總回報56.48181818181808


 68%|███████████████████████           | 3400/5000 [12:42:17<5:50:41, 13.15s/it]


訓練次數3400，總回報425.7986754966875


 68%|███████████████████████▏          | 3410/5000 [12:44:25<5:28:44, 12.41s/it]


訓練次數3410，總回報599.17382550335


 68%|███████████████████████▎          | 3420/5000 [12:47:31<9:42:34, 22.12s/it]


訓練次數3420，總回報480.6135593220308


 69%|███████████████████████▎          | 3430/5000 [12:49:42<8:21:26, 19.16s/it]


訓練次數3430，總回報867.9438127090161


 69%|███████████████████████▍          | 3440/5000 [12:51:35<6:35:04, 15.20s/it]


訓練次數3440，總回報855.8700389104993


 69%|███████████████████████▍          | 3450/5000 [12:54:06<9:23:51, 21.83s/it]


訓練次數3450，總回報911.3739926739844


 69%|███████████████████████▌          | 3460/5000 [12:56:30<6:47:21, 15.87s/it]


訓練次數3460，總回報45.544055944055884


 69%|███████████████████████▌          | 3470/5000 [12:58:48<8:42:54, 20.51s/it]


訓練次數3470，總回報564.7571428571376


 70%|███████████████████████▋          | 3480/5000 [13:01:02<5:36:42, 13.29s/it]


訓練次數3480，總回報286.7773993808051


 70%|███████████████████████▋          | 3490/5000 [13:03:08<6:15:33, 14.92s/it]


訓練次數3490，總回報885.1872180451006


 70%|███████████████████████▊          | 3500/5000 [13:05:24<8:38:07, 20.73s/it]


訓練次數3500，總回報907.7315412186264


 70%|███████████████████████▊          | 3510/5000 [13:07:37<6:23:25, 15.44s/it]


訓練次數3510，總回報910.3740614334313


 70%|███████████████████████▉          | 3520/5000 [13:09:21<5:49:10, 14.16s/it]


訓練次數3520，總回報100.04137931034504


 71%|████████████████████████          | 3530/5000 [13:11:42<5:29:42, 13.46s/it]


訓練次數3530，總回報204.76701846965747


 71%|████████████████████████          | 3540/5000 [13:14:06<6:39:41, 16.43s/it]


訓練次數3540，總回報360.26225165562835


 71%|████████████████████████▏         | 3550/5000 [13:16:42<6:32:02, 16.22s/it]


訓練次數3550，總回報528.6967637540391


 71%|████████████████████████▏         | 3560/5000 [13:18:40<6:24:22, 16.02s/it]


訓練次數3560，總回報796.3709677419287


 71%|████████████████████████▎         | 3570/5000 [13:21:21<6:32:01, 16.45s/it]


訓練次數3570，總回報403.91403508771657


 72%|████████████████████████▎         | 3580/5000 [13:22:57<3:04:29,  7.80s/it]


訓練次數3580，總回報46.139501779359364


 72%|████████████████████████▍         | 3590/5000 [13:25:04<6:21:23, 16.23s/it]


訓練次數3590，總回報163.10566037735902


 72%|████████████████████████▍         | 3600/5000 [13:27:33<7:55:21, 20.37s/it]


訓練次數3600，總回報377.75839416058375


 72%|████████████████████████▌         | 3610/5000 [13:30:14<6:08:58, 15.93s/it]


訓練次數3610，總回報593.3588235294094


 72%|████████████████████████▌         | 3620/5000 [13:32:23<6:05:00, 15.87s/it]


訓練次數3620，總回報236.92673267326802


 73%|████████████████████████▋         | 3630/5000 [13:34:37<6:27:43, 16.98s/it]


訓練次數3630，總回報444.71356466876586


 73%|████████████████████████▊         | 3640/5000 [13:36:14<3:25:38,  9.07s/it]


訓練次數3640，總回報493.6787878787863


 73%|████████████████████████▊         | 3650/5000 [13:38:16<4:37:21, 12.33s/it]


訓練次數3650，總回報141.47183098591586


 73%|████████████████████████▉         | 3660/5000 [13:40:28<5:16:52, 14.19s/it]


訓練次數3660，總回報491.7333333333288


 73%|████████████████████████▉         | 3670/5000 [13:42:57<6:55:54, 18.76s/it]


訓練次數3670，總回報333.7682847896431


 74%|█████████████████████████         | 3680/5000 [13:45:13<5:29:02, 14.96s/it]


訓練次數3680，總回報472.6338983050814


 74%|█████████████████████████         | 3690/5000 [13:47:41<5:29:28, 15.09s/it]


訓練次數3690，總回報465.49368029739526


 74%|█████████████████████████▏        | 3700/5000 [13:50:29<7:59:02, 22.11s/it]


訓練次數3700，總回報731.221052631569


 74%|█████████████████████████▏        | 3710/5000 [13:52:35<4:33:14, 12.71s/it]


訓練次數3710，總回報709.2775577557712


 74%|█████████████████████████▎        | 3720/5000 [13:54:29<4:06:42, 11.56s/it]


訓練次數3720，總回報383.8576642335762


 75%|█████████████████████████▎        | 3730/5000 [13:56:33<4:49:13, 13.66s/it]


訓練次數3730，總回報814.9745098039102


 75%|█████████████████████████▍        | 3740/5000 [13:58:41<5:53:26, 16.83s/it]


訓練次數3740，總回報282.4807947019871


 75%|█████████████████████████▌        | 3750/5000 [14:01:24<6:41:09, 19.26s/it]


訓練次數3750，總回報883.4126984126923


 75%|█████████████████████████▌        | 3760/5000 [14:03:24<5:17:45, 15.38s/it]


訓練次數3760，總回報915.9271477663091


 75%|█████████████████████████▋        | 3770/5000 [14:05:49<6:13:30, 18.22s/it]


訓練次數3770，總回報368.5909090909088


 76%|█████████████████████████▋        | 3780/5000 [14:07:35<4:11:44, 12.38s/it]


訓練次數3780，總回報914.9797833934973


 76%|█████████████████████████▊        | 3790/5000 [14:09:56<5:26:21, 16.18s/it]


訓練次數3790，總回報886.7877551020341


 76%|█████████████████████████▊        | 3800/5000 [14:12:15<3:38:42, 10.94s/it]


訓練次數3800，總回報45.34912891986054


 76%|█████████████████████████▉        | 3810/5000 [14:14:33<5:59:37, 18.13s/it]


訓練次數3810，總回報911.7373134328228


 76%|█████████████████████████▉        | 3820/5000 [14:16:44<5:09:54, 15.76s/it]


訓練次數3820，總回報172.45776397515607


 77%|██████████████████████████        | 3830/5000 [14:19:01<5:18:18, 16.32s/it]


訓練次數3830，總回報409.3564564564559


 77%|██████████████████████████        | 3840/5000 [14:21:07<3:28:49, 10.80s/it]


訓練次數3840，總回報570.2081272084778


 77%|██████████████████████████▏       | 3850/5000 [14:23:19<4:06:32, 12.86s/it]


訓練次數3850，總回報78.85882352941172


 77%|██████████████████████████▏       | 3860/5000 [14:25:06<4:58:16, 15.70s/it]


訓練次數3860，總回報656.7857142857101


 77%|██████████████████████████▎       | 3870/5000 [14:27:49<6:25:16, 20.46s/it]


訓練次數3870，總回報807.0172910662657


 78%|██████████████████████████▍       | 3880/5000 [14:29:51<5:07:30, 16.47s/it]


訓練次數3880，總回報567.4345195729491


 78%|██████████████████████████▍       | 3890/5000 [14:31:44<3:37:28, 11.76s/it]


訓練次數3890，總回報50.0365019011406


 78%|██████████████████████████▌       | 3900/5000 [14:33:21<3:08:57, 10.31s/it]


訓練次數3900，總回報286.1891566265068


 78%|██████████████████████████▌       | 3910/5000 [14:35:04<4:29:17, 14.82s/it]


訓練次數3910，總回報876.0850340135979


 78%|██████████████████████████▋       | 3920/5000 [14:36:57<4:11:55, 14.00s/it]


訓練次數3920，總回報762.4333333333263


 79%|██████████████████████████▋       | 3930/5000 [14:38:39<3:47:00, 12.73s/it]


訓練次數3930，總回報298.02934131736555


 79%|██████████████████████████▊       | 3940/5000 [14:40:38<3:56:25, 13.38s/it]


訓練次數3940，總回報851.9535836177315


 79%|██████████████████████████▊       | 3950/5000 [14:43:00<4:01:05, 13.78s/it]


訓練次數3950，總回報414.4459930313569


 79%|██████████████████████████▉       | 3960/5000 [14:45:06<4:48:56, 16.67s/it]


訓練次數3960，總回報369.65631399317215


 79%|██████████████████████████▉       | 3970/5000 [14:47:25<4:58:00, 17.36s/it]


訓練次數3970，總回報285.66531986532044


 80%|███████████████████████████       | 3980/5000 [14:49:56<4:58:03, 17.53s/it]


訓練次數3980，總回報748.942465753413


 80%|███████████████████████████▏      | 3990/5000 [14:52:06<3:43:55, 13.30s/it]


訓練次數3990，總回報486.35641025640933


 80%|███████████████████████████▏      | 4000/5000 [14:53:53<4:10:17, 15.02s/it]


訓練次數4000，總回報721.0383177569967


 80%|███████████████████████████▎      | 4010/5000 [14:56:02<3:50:56, 14.00s/it]


訓練次數4010，總回報639.4243243243178


 80%|███████████████████████████▎      | 4020/5000 [14:57:56<4:22:46, 16.09s/it]


訓練次數4020，總回報493.1453874538728


 81%|███████████████████████████▍      | 4030/5000 [15:00:24<4:01:15, 14.92s/it]


訓練次數4030，總回報128.12631578947392


 81%|███████████████████████████▍      | 4040/5000 [15:03:00<4:14:50, 15.93s/it]


訓練次數4040，總回報338.32637362637314


 81%|███████████████████████████▌      | 4050/5000 [15:05:17<3:42:46, 14.07s/it]


訓練次數4050，總回報275.2772455089828


 81%|███████████████████████████▌      | 4060/5000 [15:07:36<4:15:17, 16.30s/it]


訓練次數4060，總回報909.3432432432269


 81%|███████████████████████████▋      | 4070/5000 [15:09:48<3:46:59, 14.64s/it]


訓練次數4070，總回報392.6985915492952


 82%|███████████████████████████▋      | 4080/5000 [15:11:42<3:14:06, 12.66s/it]


訓練次數4080，總回報668.7747252747212


 82%|███████████████████████████▊      | 4090/5000 [15:13:59<3:57:56, 15.69s/it]


訓練次數4090，總回報909.5797833934969


 82%|███████████████████████████▉      | 4100/5000 [15:16:32<4:15:18, 17.02s/it]


訓練次數4100，總回報815.4192982456029


 82%|███████████████████████████▉      | 4110/5000 [15:18:56<4:42:09, 19.02s/it]


訓練次數4110，總回報592.9377049180276


 82%|████████████████████████████      | 4120/5000 [15:21:30<3:58:13, 16.24s/it]


訓練次數4120，總回報596.3194945848337


 83%|████████████████████████████      | 4130/5000 [15:24:35<5:00:54, 20.75s/it]


訓練次數4130，總回報869.4382671480105


 83%|████████████████████████████▏     | 4140/5000 [15:27:18<4:48:19, 20.12s/it]


訓練次數4140，總回報903.5640522875628


 83%|████████████████████████████▏     | 4150/5000 [15:28:59<2:39:06, 11.23s/it]


訓練次數4150，總回報362.453731343282


 83%|████████████████████████████▎     | 4160/5000 [15:30:51<3:17:27, 14.10s/it]


訓練次數4160，總回報82.26543209876549


 83%|████████████████████████████▎     | 4170/5000 [15:33:58<5:08:19, 22.29s/it]


訓練次數4170，總回報733.7756756756661


 84%|████████████████████████████▍     | 4180/5000 [15:36:21<3:45:24, 16.49s/it]


訓練次數4180，總回報499.4698630136961


 84%|████████████████████████████▍     | 4190/5000 [15:38:49<4:10:43, 18.57s/it]


訓練次數4190，總回報610.2571428571365


 84%|████████████████████████████▌     | 4200/5000 [15:40:56<3:35:37, 16.17s/it]


訓練次數4200，總回報558.0189964157667


 84%|████████████████████████████▋     | 4210/5000 [15:43:34<4:02:26, 18.41s/it]


訓練次數4210，總回報288.88139534883766


 84%|████████████████████████████▋     | 4220/5000 [15:46:11<3:40:19, 16.95s/it]


訓練次數4220，總回報460.2461538461523


 85%|████████████████████████████▊     | 4230/5000 [15:48:03<2:27:08, 11.47s/it]


訓練次數4230，總回報101.94761904761914


 85%|████████████████████████████▊     | 4240/5000 [15:49:57<3:09:11, 14.94s/it]


訓練次數4240，總回報525.859075907586


 85%|████████████████████████████▉     | 4250/5000 [15:51:51<2:45:24, 13.23s/it]


訓練次數4250，總回報111.89640287769802


 85%|████████████████████████████▉     | 4260/5000 [15:54:45<4:09:15, 20.21s/it]


訓練次數4260，總回報911.4739926739845


 85%|█████████████████████████████     | 4270/5000 [15:56:37<2:54:08, 14.31s/it]


訓練次數4270，總回報668.890476190472


 86%|█████████████████████████████     | 4280/5000 [15:59:53<4:28:03, 22.34s/it]


訓練次數4280，總回報861.6563467492158


 86%|█████████████████████████████▏    | 4290/5000 [16:02:02<3:02:51, 15.45s/it]


訓練次數4290，總回報910.404347826075


 86%|█████████████████████████████▏    | 4300/5000 [16:04:18<3:41:21, 18.97s/it]


訓練次數4300，總回報558.3052980132412


 86%|█████████████████████████████▎    | 4310/5000 [16:06:58<2:52:13, 14.98s/it]


訓練次數4310，總回報889.8862068965414


 86%|█████████████████████████████▍    | 4320/5000 [16:09:05<3:08:28, 16.63s/it]


訓練次數4320，總回報913.6885906040135


 87%|█████████████████████████████▍    | 4330/5000 [16:11:21<3:11:45, 17.17s/it]


訓練次數4330，總回報336.2045307443357


 87%|█████████████████████████████▌    | 4340/5000 [16:14:01<3:38:33, 19.87s/it]


訓練次數4340，總回報528.5926174496615


 87%|█████████████████████████████▌    | 4350/5000 [16:16:12<1:52:00, 10.34s/it]


訓練次數4350，總回報63.91196013289027


 87%|█████████████████████████████▋    | 4360/5000 [16:18:50<2:36:17, 14.65s/it]


訓練次數4360，總回報766.7908794788154


 87%|█████████████████████████████▋    | 4370/5000 [16:21:45<2:51:25, 16.33s/it]


訓練次數4370，總回報423.22802768165985


 88%|█████████████████████████████▊    | 4380/5000 [16:23:34<2:33:14, 14.83s/it]


訓練次數4380，總回報258.46213592233084


 88%|█████████████████████████████▊    | 4390/5000 [16:25:40<2:45:16, 16.26s/it]


訓練次數4390，總回報428.0029197080283


 88%|█████████████████████████████▉    | 4400/5000 [16:27:36<2:29:46, 14.98s/it]


訓練次數4400，總回報612.1680272108814


 88%|█████████████████████████████▉    | 4410/5000 [16:30:14<3:13:01, 19.63s/it]


訓練次數4410，總回報242.27627627627786


 88%|██████████████████████████████    | 4420/5000 [16:33:16<3:18:37, 20.55s/it]


訓練次數4420，總回報353.34262295081885


 89%|██████████████████████████████    | 4430/5000 [16:35:22<2:17:42, 14.50s/it]


訓練次數4430，總回報916.1272727272606


 89%|██████████████████████████████▏   | 4440/5000 [16:37:25<2:22:02, 15.22s/it]


訓練次數4440，總回報375.47010676156515


 89%|██████████████████████████████▎   | 4450/5000 [16:39:57<2:49:06, 18.45s/it]


訓練次數4450，總回報389.25326460480983


 89%|██████████████████████████████▎   | 4460/5000 [16:41:53<1:36:51, 10.76s/it]


訓練次數4460，總回報290.612893982809


 89%|██████████████████████████████▍   | 4470/5000 [16:44:24<1:53:38, 12.86s/it]


訓練次數4470，總回報653.9273381294885


 90%|██████████████████████████████▍   | 4480/5000 [16:46:11<1:43:45, 11.97s/it]


訓練次數4480，總回報378.17213114754


 90%|██████████████████████████████▌   | 4490/5000 [16:48:09<2:26:23, 17.22s/it]


訓練次數4490，總回報886.1984732824372


 90%|██████████████████████████████▌   | 4500/5000 [16:50:19<2:40:58, 19.32s/it]


訓練次數4500，總回報919.866412213734


 90%|██████████████████████████████▋   | 4510/5000 [16:53:07<2:16:40, 16.74s/it]


訓練次數4510，總回報209.77142857142965


 90%|██████████████████████████████▋   | 4520/5000 [16:55:28<2:20:41, 17.59s/it]


訓練次數4520，總回報879.0207885304574


 91%|██████████████████████████████▊   | 4530/5000 [16:57:21<1:24:24, 10.78s/it]


訓練次數4530，總回報110.21626016260178


 91%|██████████████████████████████▊   | 4540/5000 [16:59:45<1:49:43, 14.31s/it]


訓練次數4540，總回報878.1787003610054


 91%|██████████████████████████████▉   | 4550/5000 [17:02:04<2:02:56, 16.39s/it]


訓練次數4550，總回報430.9996563573857


 91%|███████████████████████████████   | 4560/5000 [17:04:05<1:32:14, 12.58s/it]


訓練次數4560，總回報412.5794520547931


 91%|███████████████████████████████   | 4570/5000 [17:06:18<2:00:14, 16.78s/it]


訓練次數4570，總回報883.9888086642557


 92%|███████████████████████████████▏  | 4580/5000 [17:08:32<1:52:56, 16.13s/it]


訓練次數4580，總回報873.8179487179422


 92%|███████████████████████████████▏  | 4590/5000 [17:10:36<1:40:55, 14.77s/it]


訓練次數4590，總回報549.3268456375805


 92%|███████████████████████████████▎  | 4600/5000 [17:12:19<1:26:04, 12.91s/it]


訓練次數4600，總回報579.3424028268516


 92%|███████████████████████████████▎  | 4610/5000 [17:14:17<1:23:39, 12.87s/it]


訓練次數4610，總回報274.59655172413807


 92%|███████████████████████████████▍  | 4620/5000 [17:16:53<2:07:58, 20.21s/it]


訓練次數4620，總回報861.5714285714172


 93%|███████████████████████████████▍  | 4630/5000 [17:18:47<1:18:01, 12.65s/it]


訓練次數4630，總回報706.1207792207688


 93%|███████████████████████████████▌  | 4640/5000 [17:21:10<1:37:12, 16.20s/it]


訓練次數4640，總回報924.8032786885121


 93%|███████████████████████████████▌  | 4650/5000 [17:23:38<1:29:26, 15.33s/it]


訓練次數4650，總回報387.4821917808208


 93%|███████████████████████████████▋  | 4660/5000 [17:25:26<1:01:41, 10.89s/it]


訓練次數4660，總回報324.93986013986023


 93%|███████████████████████████████▊  | 4670/5000 [17:28:04<2:02:33, 22.28s/it]


訓練次數4670，總回報907.9034482758482


 94%|███████████████████████████████▊  | 4680/5000 [17:31:13<1:55:01, 21.57s/it]


訓練次數4680，總回報889.2320610686961


 94%|███████████████████████████████▉  | 4690/5000 [17:33:59<1:37:20, 18.84s/it]


訓練次數4690，總回報914.5093632958723


 94%|███████████████████████████████▉  | 4700/5000 [17:36:37<1:30:53, 18.18s/it]


訓練次數4700，總回報581.6306397306333


 94%|████████████████████████████████  | 4710/5000 [17:39:24<1:43:50, 21.48s/it]


訓練次數4710，總回報750.9753246753138


 94%|████████████████████████████████  | 4720/5000 [17:42:07<1:26:27, 18.53s/it]


訓練次數4720，總回報914.1315412186276


 95%|████████████████████████████████▏ | 4730/5000 [17:44:36<1:13:20, 16.30s/it]


訓練次數4730，總回報112.69211356466903


 95%|████████████████████████████████▏ | 4740/5000 [17:47:18<1:31:14, 21.06s/it]


訓練次數4740，總回報430.72664907651455


 95%|████████████████████████████████▎ | 4750/5000 [17:49:39<1:05:28, 15.71s/it]


訓練次數4750，總回報915.9779922779786


 95%|████████████████████████████████▎ | 4760/5000 [17:52:06<1:08:55, 17.23s/it]


訓練次數4760，總回報394.1990353697729


 95%|████████████████████████████████▍ | 4770/5000 [17:55:47<1:48:53, 28.41s/it]


訓練次數4770，總回報903.790851735006


 96%|██████████████████████████████████▍ | 4780/5000 [17:58:15<55:57, 15.26s/it]


訓練次數4780，總回報235.02727272727418


 96%|██████████████████████████████████▍ | 4790/5000 [18:00:10<51:35, 14.74s/it]


訓練次數4790，總回報736.381818181805


 96%|██████████████████████████████████▌ | 4800/5000 [18:02:36<53:41, 16.11s/it]


訓練次數4800，總回報467.46031746031514


 96%|██████████████████████████████████▋ | 4810/5000 [18:04:48<59:39, 18.84s/it]


訓練次數4810，總回報888.0544061302643


 96%|██████████████████████████████████▋ | 4820/5000 [18:07:02<43:29, 14.50s/it]


訓練次數4820，總回報490.120547945202


 97%|████████████████████████████████▊ | 4830/5000 [18:09:50<1:01:42, 21.78s/it]


訓練次數4830，總回報524.181226053636


 97%|██████████████████████████████████▊ | 4840/5000 [18:12:36<44:42, 16.77s/it]


訓練次數4840，總回報888.5703071672208


 97%|██████████████████████████████████▉ | 4850/5000 [18:14:44<33:44, 13.49s/it]


訓練次數4850，總回報52.82978723404247


 97%|██████████████████████████████████▉ | 4860/5000 [18:17:28<41:42, 17.87s/it]


訓練次數4860，總回報685.4660130718866


 97%|███████████████████████████████████ | 4870/5000 [18:19:56<34:52, 16.10s/it]


訓練次數4870，總回報532.1815331010414


 98%|███████████████████████████████████▏| 4880/5000 [18:21:45<23:49, 11.92s/it]


訓練次數4880，總回報354.5170212765954


 98%|███████████████████████████████████▏| 4890/5000 [18:24:06<34:25, 18.78s/it]


訓練次數4890，總回報829.5627986347978


 98%|███████████████████████████████████▎| 4900/5000 [18:26:26<31:53, 19.13s/it]


訓練次數4900，總回報911.8664122137334


 98%|███████████████████████████████████▎| 4910/5000 [18:29:06<28:35, 19.06s/it]


訓練次數4910，總回報207.4013698630143


 98%|███████████████████████████████████▍| 4920/5000 [18:31:24<17:49, 13.37s/it]


訓練次數4920，總回報676.9648648648568


 99%|███████████████████████████████████▍| 4930/5000 [18:33:45<19:22, 16.61s/it]


訓練次數4930，總回報528.0895752895703


 99%|███████████████████████████████████▌| 4940/5000 [18:35:39<13:36, 13.60s/it]


訓練次數4940，總回報508.16506024096054


 99%|███████████████████████████████████▋| 4950/5000 [18:37:16<07:09,  8.58s/it]


訓練次數4950，總回報107.7000000000003


 99%|███████████████████████████████████▋| 4960/5000 [18:39:41<10:53, 16.33s/it]


訓練次數4960，總回報378.09970326409314


 99%|███████████████████████████████████▊| 4970/5000 [18:41:47<07:06, 14.22s/it]


訓練次數4970，總回報481.0720279720267


100%|███████████████████████████████████▊| 4980/5000 [18:44:09<04:43, 14.18s/it]


訓練次數4980，總回報584.4354838709648


100%|███████████████████████████████████▉| 4990/5000 [18:46:21<02:17, 13.75s/it]


訓練次數4990，總回報638.5646302250739


100%|████████████████████████████████████| 5000/5000 [18:48:46<00:00, 13.55s/it]


訓練次數5000，總回報459.55082508250644





In [None]:
Agent.Record()

481.5219178082169


In [None]:
#線性衰減策略
self.EPS=max( 1-i*(1-self.eps_low)/(x*N_EPISODES/10) , self.eps_low) # x∈R
#指數型衰減策略
self.EPS=self.eps_low+(1-self.eps_low)*math.exp(-i*y/(N_EPISODES)) # y∈R