# 前期准备

## 创建环境

In [1]:
import gymnasium as gym

# 创建Lunar Lander环境，这里使用离散动作空间的版本
env = gym.make("LunarLander-v2")


## 探索环境
了解 动作空间, 观察空间

后续 代理设计 需要

In [3]:
print("Action Space:", env.action_space)
print("Observation Space:", env.observation_space)

Action Space: Discrete(4)
Observation Space: Box([-1.5       -1.5       -5.        -5.        -3.1415927 -5.
 -0.        -0.       ], [1.5       1.5       5.        5.        3.1415927 5.        1.
 1.       ], (8,), float32)


## 示例代码

随机动作

In [158]:
import gymnasium as gym
env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset()



for _ in range(1000):
    action = env.action_space.sample()  # agent policy that uses the observation and info
    cumulative_reward = 0
    
    observation, reward, terminated, truncated, info = env.step(action)
    cumulative_reward += reward
    
    # print("{}, {}, {}, {}".format(observation, reward, terminated, truncated))

    if terminated or truncated:
        print("Episode finished after {} timesteps".format(_+1), "Cumulative Reward: ", cumulative_reward)
        observation, info = env.reset()

env.close()

Episode finished after 77 timesteps Cumulative Reward:  -100
Episode finished after 149 timesteps Cumulative Reward:  -100
Episode finished after 252 timesteps Cumulative Reward:  -100
Episode finished after 343 timesteps Cumulative Reward:  -100
Episode finished after 438 timesteps Cumulative Reward:  -100
Episode finished after 530 timesteps Cumulative Reward:  -100
Episode finished after 618 timesteps Cumulative Reward:  -100
Episode finished after 745 timesteps Cumulative Reward:  -100
Episode finished after 825 timesteps Cumulative Reward:  -100
Episode finished after 960 timesteps Cumulative Reward:  -100


## 显卡测试

pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

pip install torch==2.2.2+cu118 torchvision==0.14.2+cu118 torchaudio==0.12.2+cu118 -f https://download.pytorch.org/whl/torch_stable.html


In [4]:
import torch

print("PyTorch version: ", torch.__version__)
print("CUDA version: ", torch.version.cuda)

# 输出是否可以使用 CUDA
print("CUDA available: ", torch.cuda.is_available())


PyTorch version:  2.2.2+cu118
CUDA version:  11.8
CUDA available:  True


# 正式工作 (使用DQN)

训练一个Agent, 根据Observation和info, 决定最佳Action

## 01 导入并初始化环境

In [203]:
import gymnasium as gym
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
from collections import deque
import matplotlib.pyplot as plt

env = gym.make("LunarLander-v2")


## 02 定义DQN网络模型

通过PyTorch的nn模块, 来构建神经网络

In [206]:
import torch
import torch.nn as nn
import torch.optim as optim

class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        "state_size: 状态空间的维度"
        "action_size: 动作空间的维度"
        super(DQN, self).__init__()
        # 定义神经网络的结构
        # 输入层的维度是状态空间的维度, 输出层的维度是动作空间的维度
        # 输入层 -(fc1)> 128 -(fc2)> 64 -(fc3)> 输出层 (常见的三层全连接神经网络)
        # fc1, fc2, fc3 分别是三个全连接层, 用于处理输入数据
        self.fc1 = nn.Linear(state_size, 128) 
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, action_size)
        self.relu = nn.ReLU() # relu 是激活函数, 用于增加网络的非线性 (可以在每个全连接层后面添加, 是一个超参数)

    # 前向传播: 计算出网络的输出和损失, 用来更新网络
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x) # 返回一个向量, 其维度 = 动作空间的维度. i.e., 每个动作的Q值, 即给定状态下动作的价值(分数 / 预期回报)


In [188]:
import torch
import torch.nn as nn
import torch.optim as optim

class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        # 增加更多的隐藏层和神经元
        self.fc1 = nn.Linear(state_size, 256)  # 第一个隐藏层, 增加到256个神经元
        self.fc2 = nn.Linear(256, 128)  # 第二个隐藏层, 128个神经元
        self.fc3 = nn.Linear(128, 128)  # 第三个隐藏层, 也是128个神经元
        self.fc4 = nn.Linear(128, 64)   # 第四个隐藏层, 64个神经元
        self.fc5 = nn.Linear(64, action_size)  # 输出层
        self.relu = nn.ReLU()  # 使用ReLU激活函数

    def forward(self, x):
        # 应用ReLU激活函数到每一个隐藏层
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.fc5(x)  # 输出层不用激活函数，直接返回动作的Q值
        return x


## 03 实现DQN Agent

创建一个Agent类, 用来实现DQN的训练 (包括经验回放)

In [240]:
import random
import numpy as np
from collections import deque

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 确定设备
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.99
        self.epsilon = 1
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.990
        self.model = DQN(state_size, action_size)
        self.optimizer = optim.Adam(self.model.parameters())

    # 定义 Agent如何根据State选择Action
    def act(self, state):
        # state = torch.from_numpy(state).float().unsqueeze(0).to(self.device) # 将状态转换为张量 (还要添加一个维度), 以便输入到网络中
        
        # 使用 ε-greedy 策略选择动作
        if np.random.rand() <= self.epsilon: # 如果随机数小于 ε, 则随机选择一个动作, 用于探索
            return random.randrange(self.action_size)
        
        # 得到Q值, 用于选择动作
        self.model.eval() # 将模型设置为评估模式, 这样可以避免在评估模型时进行梯度更新
        with torch.no_grad(): # 不需要计算梯度, 因为我们只是在评估模型
            action_values = self.model(state) # 用当前状态获取每个动作的Q值
            
        self.model.train() # 修改回训练模式, 以便在训练模型时进行梯度更新 (模型的参数可以继续更新)
        return np.argmax(action_values.cpu().data.numpy()) # 根据Q值选择最佳动作
    
    # 用于 经验回放 (Experience Replay)
    # 当Agent在Environment中执行Action 并观察到新的状态和奖励时, 将这些信息存储, 之后用于训练网络模型
    def store(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    # 经验回放 (Experience Replay)
    # 打破数据之间的相关性, 提高训练的稳定性
    def replay(self, batch_size):
        if len(self.memory) < batch_size: # 如果记忆库中的样本数量小于批量大小, 则不执行
            return
        
        minibatch = random.sample(self.memory, batch_size) # 从记忆库中随机选择一个批量的经验
        states, actions, rewards, next_states, dones = zip(*minibatch) # 将批量经验拆分为状态, 动作, 奖励, 下一个状态, 完成标志
        # 将拆分的经验转换为张量, 以便输入到网络中
        states = torch.tensor(states, dtype =torch.float32).to(self.device).squeeze(1)
        actions = torch.tensor(actions, dtype=torch.long).to(self.device).unsqueeze(1)
        rewards = torch.tensor(rewards, dtype=torch.float32).to(self.device).unsqueeze(1)
        next_states = torch.tensor(next_states, dtype=torch.float32).to(self.device).squeeze(1)
        dones = torch.tensor(dones, dtype=torch.bool).to(self.device).unsqueeze(1)

        # 打印所有张量的形状
        # print("states: ", states.shape)
        # print("actions: ", actions.shape)
        # print("rewards: ", rewards)
        # print("next_states: ", next_states.shape)
        # print("dones: ", dones.shape)

        Q_targets_next = self.model(next_states).detach().max(1)[0].unsqueeze(1) # 使用目标网络计算下一个状态的Q值, 用于计算目标Q值
        Q_targets = rewards + (self.gamma * Q_targets_next * (~dones)) # 计算目标Q值, 用于更新当前状态的Q值
        Q_expected = self.model(states).gather(1, actions) # 计算预期Q值, 用于计算损失

        loss = nn.MSELoss()(Q_expected, Q_targets) # 计算均方误差损失
        self.optimizer.zero_grad() # 梯度清零, 以便在每次迭代中重新计算梯度
        loss.backward() # 反向传播, 计算梯度
        self.optimizer.step() # 更新网络参数

        if self.epsilon > self.epsilon_min: # 更新 ε, 以便在训练过程中逐渐减小探索的概率, 以便在初期更多地探索, 在后期更多地利用经验
            self.epsilon *= self.epsilon_decay


## 04 训练Agent

In [241]:
import os

batch_size = 8

env = gym.make("LunarLander-v2")

current_dir = os.getcwd()
model_dir = os.path.join(current_dir, "models")
os.makedirs(model_dir, exist_ok=True) # 创建模型保存目录


agent = DQNAgent(env.observation_space.shape[0], env.action_space.n)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
agent.model.to(device)  # 移动模型到GPU

for e in range(5000): # 训练1000个episode
    full_state = env.reset()
    state = full_state[0] # 提取向量
    # print("初始状态", state)
    state = torch.from_numpy(np.reshape(state, [1, -1])).float().to(device) # 把state转换为 网络模型 接受的形状
    
    cumulative_reward = 0  # 初始化累积奖励为0
    
    for time in range(1000): # 一个episode最多执行500个时间步
        action = agent.act(state)
        next_state, reward, done, truncated, info = env.step(action)  # 执行动作, 获取下一个状态, 奖励, 完成标志, 和
        cumulative_reward += reward # 累积奖励
        
        # print("下一个状态", next_state)
        next_state = torch.from_numpy(np.reshape(next_state, [1, -1])).float().to(device) # 把next_state转换为 网络模型 接受的形状
        reward = torch.tensor([reward], device=device) # 把reward转换为张量
        done = torch.tensor([done], device=device)
        
        # agent.store(state, action, reward, next_state, done) # 存储经验
        agent.store(state.cpu().numpy(), action, reward.cpu().numpy(), next_state.cpu().numpy(), done.cpu().numpy()) # 存储经验
        state = next_state # 更新状态
        
        if done:
            print("Episode: {}/{}, score: {}, cumulative reward: {}".format(e, 1000, time, cumulative_reward)) # 打印每个episode的时间步数和累积奖励
            break
        if len(agent.memory) > batch_size: # 当记忆库中的样本数量大于32时, 开始经验回放
            agent.replay(batch_size)
    
    # 每100个episode结束后, 保存模型的状态字典
    if e % 100 == 0:
        model_path = os.path.join(model_dir, f"model_0.pth")  # 使用 f-string 包含 episode 数
        torch.save(agent.model.state_dict(), model_path)
        print("模型状态字典已保存至", model_path)

    


Using device: cuda


  dones = torch.tensor(dones, dtype=torch.bool).to(self.device).unsqueeze(1)


Episode: 0/1000, score: 89, cumulative reward: -93.46879846965741
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1/1000, score: 124, cumulative reward: -65.00613499451114
Episode: 2/1000, score: 146, cumulative reward: -469.51447154491416
Episode: 3/1000, score: 75, cumulative reward: -271.57072413578373
Episode: 4/1000, score: 82, cumulative reward: -236.36752059382155
Episode: 5/1000, score: 57, cumulative reward: -64.60773001129647
Episode: 6/1000, score: 124, cumulative reward: -324.80365263736326
Episode: 7/1000, score: 85, cumulative reward: -488.28733760585453
Episode: 8/1000, score: 68, cumulative reward: -467.29359942396053
Episode: 9/1000, score: 109, cumulative reward: -197.121425065596
Episode: 10/1000, score: 78, cumulative reward: -113.63788662459672
Episode: 11/1000, score: 79, cumulative reward: -59.590175959696985
Episode: 12/1000, score: 81, cumulative reward: -173.5039994405879
Episode: 13/1000, score: 159, cumulative re

# Test

In [247]:
import gymnasium as gym
import numpy as np

import torch# 确保从包含DQN类定义的文件中导入
import os

# 假设状态空间和动作空间的维度已知
state_size = 8  # 根据你的环境设置
action_size = 4  # 根据你的环境设置

current_dir = os.getcwd()
model_dir = os.path.join(current_dir, "models")
model_path = os.path.join(model_dir, "model_0.pth")

model = DQN(state_size, action_size)
model.load_state_dict(torch.load(model_path))
model.eval()  # 将模型设置为评估模式

env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset()



for _ in range(1000):
    # 将观测转换为适合模型的格式
    state = torch.tensor([observation], dtype=torch.float32)
    
    cumulative_reward = 0  # 初始化累积奖励为0
    
    with torch.no_grad():  # 禁止torch追踪此处的梯度计算，因为我们在推理而不是训练
        action = model(state).max(1)[1].item()  # 获取最大Q值对应的动作

    observation, reward, terminated, truncated, info = env.step(action)
    # print("reward: ", reward)
    cumulative_reward += reward # 累积奖励
    

    if terminated or truncated:
        print("Episode finished, reward: ", cumulative_reward)
        observation, info = env.reset()

env.close()


Episode finished, reward:  -100
Episode finished, reward:  -100
Episode finished, reward:  -100
Episode finished, reward:  -100
Episode finished, reward:  -100
Episode finished, reward:  -100
Episode finished, reward:  -100
Episode finished, reward:  -100
Episode finished, reward:  -100


# Analysis


In [249]:
import matplotlib.pyplot as plt

# 假设你的数据存储在一个多行的字符串中，每行包含episode的索引和对应的cumulative reward
data = """
Episode: 16/1000, score: 141, cumulative reward: -1.0854995087740065
Episode: 17/1000, score: 131, cumulative reward: -282.7268036360707
Episode: 18/1000, score: 83, cumulative reward: -114.39879547471764
Episode: 19/1000, score: 62, cumulative reward: -100.58534007483678
Episode: 20/1000, score: 99, cumulative reward: -175.7076604182946
Episode: 21/1000, score: 79, cumulative reward: -367.88733668931724
Episode: 22/1000, score: 141, cumulative reward: -108.62529974567241
Episode: 23/1000, score: 87, cumulative reward: -140.46560012319634
Episode: 24/1000, score: 91, cumulative reward: -36.09876202488024
Episode: 25/1000, score: 106, cumulative reward: -225.90471141568986
Episode: 26/1000, score: 98, cumulative reward: -410.3692796563164
Episode: 27/1000, score: 131, cumulative reward: -91.09574698394412
Episode: 28/1000, score: 96, cumulative reward: -132.16539380861593
Episode: 29/1000, score: 155, cumulative reward: -421.6953556061639
Episode: 30/1000, score: 142, cumulative reward: -180.4933134454235
Episode: 31/1000, score: 202, cumulative reward: -54.79963124598408
Episode: 32/1000, score: 225, cumulative reward: -126.56827732373542
Episode: 33/1000, score: 150, cumulative reward: -192.83394480585423
Episode: 34/1000, score: 148, cumulative reward: -422.77951447643534
Episode: 35/1000, score: 81, cumulative reward: -275.447377357453
Episode: 36/1000, score: 98, cumulative reward: -425.49348959402266
Episode: 37/1000, score: 108, cumulative reward: -257.8304125686582
Episode: 38/1000, score: 231, cumulative reward: -13.010713328172884
Episode: 39/1000, score: 126, cumulative reward: -353.81515380657225
Episode: 40/1000, score: 96, cumulative reward: -266.9062707032228
Episode: 41/1000, score: 122, cumulative reward: -102.77137385327522
Episode: 42/1000, score: 136, cumulative reward: -223.48070367803783
Episode: 43/1000, score: 83, cumulative reward: -230.04648747477407
Episode: 44/1000, score: 137, cumulative reward: -594.4501157253133
Episode: 45/1000, score: 61, cumulative reward: -172.64460315716445
Episode: 46/1000, score: 111, cumulative reward: -508.42121211878316
Episode: 47/1000, score: 116, cumulative reward: -513.3058946759363
Episode: 48/1000, score: 81, cumulative reward: -253.15504693349587
Episode: 49/1000, score: 112, cumulative reward: -147.48267630728176
Episode: 50/1000, score: 71, cumulative reward: -215.98962047708264
Episode: 51/1000, score: 128, cumulative reward: -285.3202266237786
Episode: 52/1000, score: 137, cumulative reward: -599.3491622935096
Episode: 53/1000, score: 82, cumulative reward: -188.9600801475994
Episode: 54/1000, score: 106, cumulative reward: -75.14806256234766
Episode: 55/1000, score: 80, cumulative reward: -184.82127012936547
Episode: 56/1000, score: 54, cumulative reward: -136.0383214030735
Episode: 57/1000, score: 91, cumulative reward: -267.0623364773543
Episode: 58/1000, score: 151, cumulative reward: -268.7709265885819
Episode: 59/1000, score: 115, cumulative reward: -140.22372394050393
Episode: 60/1000, score: 79, cumulative reward: -133.3512015871027
Episode: 61/1000, score: 164, cumulative reward: -115.14215499144835
Episode: 62/1000, score: 82, cumulative reward: -159.7977424871561
Episode: 63/1000, score: 143, cumulative reward: -129.2094083665849
Episode: 64/1000, score: 104, cumulative reward: -60.67448130053571
Episode: 65/1000, score: 112, cumulative reward: -320.37184241531725
Episode: 66/1000, score: 95, cumulative reward: -230.0995213381801
Episode: 67/1000, score: 86, cumulative reward: -176.88019021445615
Episode: 68/1000, score: 100, cumulative reward: -208.73723149125405
Episode: 69/1000, score: 71, cumulative reward: -116.60952820275888
Episode: 70/1000, score: 116, cumulative reward: -295.29452874972503
Episode: 71/1000, score: 65, cumulative reward: -290.2592538209698
Episode: 72/1000, score: 169, cumulative reward: -187.19024135135757
Episode: 73/1000, score: 66, cumulative reward: -39.22509827269717
Episode: 74/1000, score: 101, cumulative reward: -141.59424544565306
Episode: 75/1000, score: 75, cumulative reward: -125.21452271422007
Episode: 76/1000, score: 82, cumulative reward: -230.87054696373434
Episode: 77/1000, score: 84, cumulative reward: -169.8630343977139
Episode: 78/1000, score: 114, cumulative reward: -240.28937131332216
Episode: 79/1000, score: 88, cumulative reward: -212.9952266848971
Episode: 80/1000, score: 85, cumulative reward: -267.4633039120638
Episode: 81/1000, score: 133, cumulative reward: -38.296633578206226
Episode: 82/1000, score: 72, cumulative reward: -135.4841510795642
Episode: 83/1000, score: 108, cumulative reward: -117.4326841614187
Episode: 84/1000, score: 74, cumulative reward: -155.82147235081726
Episode: 85/1000, score: 103, cumulative reward: 4.255928757316923
Episode: 86/1000, score: 74, cumulative reward: -110.90254436452234
Episode: 87/1000, score: 109, cumulative reward: -236.53068937791645
Episode: 88/1000, score: 87, cumulative reward: -247.30132654498064
Episode: 89/1000, score: 95, cumulative reward: -140.86943141477582
Episode: 90/1000, score: 99, cumulative reward: -156.53534032692238
Episode: 91/1000, score: 99, cumulative reward: -197.07325160547987
Episode: 92/1000, score: 76, cumulative reward: -210.87877705662225
Episode: 93/1000, score: 82, cumulative reward: -287.0778406266878
Episode: 94/1000, score: 56, cumulative reward: -191.86906230590185
Episode: 95/1000, score: 115, cumulative reward: -246.6723261497472
Episode: 96/1000, score: 112, cumulative reward: -171.2834850087288
Episode: 97/1000, score: 90, cumulative reward: -118.22413038200605
Episode: 98/1000, score: 81, cumulative reward: -288.6051859790314
Episode: 99/1000, score: 112, cumulative reward: -58.24004665396774
Episode: 100/1000, score: 114, cumulative reward: -220.14453103631408
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 101/1000, score: 108, cumulative reward: -283.3840850837994
Episode: 102/1000, score: 77, cumulative reward: -306.5016177291286
Episode: 103/1000, score: 74, cumulative reward: -205.26011492307455
Episode: 104/1000, score: 103, cumulative reward: -345.67349698721216
Episode: 105/1000, score: 70, cumulative reward: -190.62952119181438
Episode: 106/1000, score: 138, cumulative reward: -10.549447806543157
Episode: 107/1000, score: 112, cumulative reward: -109.86651820259456
Episode: 108/1000, score: 126, cumulative reward: -367.60956832692244
Episode: 109/1000, score: 93, cumulative reward: -420.8019704935747
Episode: 110/1000, score: 210, cumulative reward: -46.64319780060221
Episode: 111/1000, score: 165, cumulative reward: -70.91012262723137
Episode: 112/1000, score: 105, cumulative reward: -5.601510127199944
Episode: 113/1000, score: 230, cumulative reward: -314.4754382112916
Episode: 114/1000, score: 159, cumulative reward: 20.025791746230425
Episode: 115/1000, score: 160, cumulative reward: -246.11229310164734
Episode: 116/1000, score: 103, cumulative reward: -257.8374367168309
Episode: 117/1000, score: 124, cumulative reward: -172.95327398179523
Episode: 118/1000, score: 80, cumulative reward: -255.6088236049141
Episode: 119/1000, score: 114, cumulative reward: -218.71481042388788
Episode: 120/1000, score: 124, cumulative reward: -467.7251498402534
Episode: 121/1000, score: 86, cumulative reward: -271.0281432514105
Episode: 122/1000, score: 141, cumulative reward: -202.25113960976438
Episode: 123/1000, score: 104, cumulative reward: -206.1827272097577
Episode: 124/1000, score: 112, cumulative reward: -343.3319833780537
Episode: 125/1000, score: 92, cumulative reward: -354.369382010629
Episode: 126/1000, score: 89, cumulative reward: -149.69945201900654
Episode: 127/1000, score: 198, cumulative reward: -320.2950296209608
Episode: 128/1000, score: 99, cumulative reward: -268.55461791288707
Episode: 129/1000, score: 187, cumulative reward: -481.4853370005852
Episode: 130/1000, score: 112, cumulative reward: -136.41178452924188
Episode: 131/1000, score: 77, cumulative reward: -230.80725225500802
Episode: 132/1000, score: 165, cumulative reward: -396.0584152462951
Episode: 133/1000, score: 112, cumulative reward: -239.75300167907616
Episode: 134/1000, score: 147, cumulative reward: -276.4798022717747
Episode: 135/1000, score: 101, cumulative reward: -280.874448419572
Episode: 136/1000, score: 105, cumulative reward: -99.05589796110723
Episode: 137/1000, score: 83, cumulative reward: -189.31776074409038
Episode: 138/1000, score: 177, cumulative reward: -191.1038481170395
Episode: 139/1000, score: 100, cumulative reward: -56.46259784798983
Episode: 140/1000, score: 77, cumulative reward: -125.92273414769491
Episode: 141/1000, score: 92, cumulative reward: -259.6989449357572
Episode: 142/1000, score: 123, cumulative reward: -70.72564716432312
Episode: 143/1000, score: 108, cumulative reward: -395.38992658246116
Episode: 144/1000, score: 123, cumulative reward: -247.20323421434685
Episode: 145/1000, score: 100, cumulative reward: -138.12176614202335
Episode: 146/1000, score: 121, cumulative reward: -141.31020649010998
Episode: 147/1000, score: 150, cumulative reward: -317.11843608501033
Episode: 148/1000, score: 94, cumulative reward: -283.34459565278917
Episode: 149/1000, score: 71, cumulative reward: -118.79082290755801
Episode: 150/1000, score: 130, cumulative reward: -133.56421421469418
Episode: 151/1000, score: 165, cumulative reward: -139.66413024388237
Episode: 152/1000, score: 85, cumulative reward: -220.14090253485065
Episode: 153/1000, score: 134, cumulative reward: -190.32945401140262
Episode: 154/1000, score: 148, cumulative reward: -33.4342642540644
Episode: 155/1000, score: 110, cumulative reward: -92.66644096605582
Episode: 156/1000, score: 128, cumulative reward: -396.03763797130256
Episode: 157/1000, score: 78, cumulative reward: -167.72425852544944
Episode: 158/1000, score: 126, cumulative reward: -294.0765533979427
Episode: 159/1000, score: 195, cumulative reward: -229.95470465844693
Episode: 160/1000, score: 128, cumulative reward: -288.02549358075
Episode: 161/1000, score: 167, cumulative reward: -311.7540077193931
Episode: 162/1000, score: 219, cumulative reward: -105.19645605256117
Episode: 163/1000, score: 136, cumulative reward: -225.53021812634296
Episode: 164/1000, score: 209, cumulative reward: -110.20413674297058
Episode: 165/1000, score: 94, cumulative reward: -414.3668977766969
Episode: 166/1000, score: 152, cumulative reward: -26.40219651776016
Episode: 167/1000, score: 133, cumulative reward: -88.865305921007
Episode: 168/1000, score: 100, cumulative reward: -125.62511082402628
Episode: 169/1000, score: 205, cumulative reward: -187.19401607538566
Episode: 170/1000, score: 108, cumulative reward: -185.35728722732694
Episode: 171/1000, score: 94, cumulative reward: -169.98883826895053
Episode: 172/1000, score: 173, cumulative reward: -346.87293158173753
Episode: 173/1000, score: 50, cumulative reward: -106.60256208624067
Episode: 174/1000, score: 74, cumulative reward: -441.7066566358749
Episode: 175/1000, score: 79, cumulative reward: -118.23076547961391
Episode: 176/1000, score: 289, cumulative reward: -53.23621578456428
Episode: 177/1000, score: 83, cumulative reward: -170.1318601923031
Episode: 178/1000, score: 186, cumulative reward: -152.43520337264718
Episode: 179/1000, score: 124, cumulative reward: -136.60074496616426
Episode: 180/1000, score: 125, cumulative reward: -303.84896765275715
Episode: 181/1000, score: 155, cumulative reward: -333.4521353704233
Episode: 182/1000, score: 145, cumulative reward: -101.06843448008763
Episode: 183/1000, score: 124, cumulative reward: 6.428323364419811
Episode: 184/1000, score: 95, cumulative reward: -279.29582105831935
Episode: 185/1000, score: 196, cumulative reward: -195.52409673703602
Episode: 186/1000, score: 102, cumulative reward: -74.51651307263471
Episode: 187/1000, score: 154, cumulative reward: -395.18980614035377
Episode: 188/1000, score: 128, cumulative reward: -74.61148735439508
Episode: 189/1000, score: 101, cumulative reward: -179.99648988082464
Episode: 190/1000, score: 170, cumulative reward: -66.42133055921911
Episode: 191/1000, score: 137, cumulative reward: -313.18360409036154
Episode: 192/1000, score: 72, cumulative reward: -375.5155881586129
Episode: 193/1000, score: 88, cumulative reward: -301.4264211204387
Episode: 194/1000, score: 174, cumulative reward: -35.57487624814868
Episode: 195/1000, score: 97, cumulative reward: -45.00781186090369
Episode: 196/1000, score: 143, cumulative reward: -448.6338931910624
Episode: 197/1000, score: 115, cumulative reward: -345.5715772267311
Episode: 198/1000, score: 106, cumulative reward: -67.10582366914505
Episode: 199/1000, score: 130, cumulative reward: -149.34068562679374
Episode: 200/1000, score: 92, cumulative reward: -56.84748176557286
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 201/1000, score: 77, cumulative reward: -289.33691848720343
Episode: 202/1000, score: 124, cumulative reward: -154.10419581365116
Episode: 203/1000, score: 157, cumulative reward: -211.9560001149436
Episode: 204/1000, score: 179, cumulative reward: -154.53886423541462
Episode: 205/1000, score: 90, cumulative reward: -30.383968181364978
Episode: 206/1000, score: 93, cumulative reward: -217.42629972945485
Episode: 207/1000, score: 119, cumulative reward: -21.76751378864728
Episode: 208/1000, score: 74, cumulative reward: -130.38422712530274
Episode: 209/1000, score: 100, cumulative reward: -68.28600406686829
Episode: 210/1000, score: 207, cumulative reward: -310.0088746160267
Episode: 211/1000, score: 127, cumulative reward: -55.01741904736439
Episode: 212/1000, score: 115, cumulative reward: -212.43770737320526
Episode: 213/1000, score: 85, cumulative reward: -282.4263373912921
Episode: 214/1000, score: 272, cumulative reward: -412.56384545952056
Episode: 215/1000, score: 152, cumulative reward: -319.201173077803
Episode: 216/1000, score: 509, cumulative reward: -187.70796516063677
Episode: 217/1000, score: 64, cumulative reward: -178.90460752390018
Episode: 218/1000, score: 135, cumulative reward: 18.33723440709791
Episode: 219/1000, score: 111, cumulative reward: -305.4863849376975
Episode: 220/1000, score: 125, cumulative reward: -120.19046065976326
Episode: 221/1000, score: 223, cumulative reward: -269.4198952679484
Episode: 222/1000, score: 117, cumulative reward: -257.8479514299217
Episode: 223/1000, score: 572, cumulative reward: -190.03124152705692
Episode: 224/1000, score: 110, cumulative reward: -271.94427653759624
Episode: 225/1000, score: 204, cumulative reward: -389.19623840611496
Episode: 226/1000, score: 97, cumulative reward: -363.5125120949811
Episode: 227/1000, score: 102, cumulative reward: -403.97824950262276
Episode: 228/1000, score: 81, cumulative reward: -412.91527000620414
Episode: 229/1000, score: 94, cumulative reward: -338.2188124858851
Episode: 230/1000, score: 234, cumulative reward: -437.28513932927956
Episode: 231/1000, score: 234, cumulative reward: 2.2595232987638667
Episode: 232/1000, score: 111, cumulative reward: -269.3557468631352
Episode: 233/1000, score: 217, cumulative reward: -181.18364717287275
Episode: 234/1000, score: 135, cumulative reward: -73.8162421441324
Episode: 235/1000, score: 130, cumulative reward: -104.8207438868624
Episode: 236/1000, score: 227, cumulative reward: -207.14001600085817
Episode: 237/1000, score: 254, cumulative reward: -118.30667580381657
Episode: 238/1000, score: 110, cumulative reward: -196.64946520932244
Episode: 239/1000, score: 358, cumulative reward: -262.15439156877903
Episode: 240/1000, score: 153, cumulative reward: -159.63988539444074
Episode: 241/1000, score: 296, cumulative reward: -231.8404637069238
Episode: 242/1000, score: 165, cumulative reward: -136.38831538246913
Episode: 243/1000, score: 158, cumulative reward: -354.4624043883613
Episode: 244/1000, score: 129, cumulative reward: -151.37010193611357
Episode: 245/1000, score: 106, cumulative reward: -196.13854407430716
Episode: 246/1000, score: 172, cumulative reward: -124.90943624647691
Episode: 247/1000, score: 120, cumulative reward: 5.9270648704640365
Episode: 248/1000, score: 138, cumulative reward: -45.60445821058386
Episode: 249/1000, score: 146, cumulative reward: -261.92967774854657
Episode: 250/1000, score: 205, cumulative reward: -180.4809004854636
Episode: 251/1000, score: 93, cumulative reward: -139.74967860510887
Episode: 252/1000, score: 155, cumulative reward: -50.958027692887775
Episode: 253/1000, score: 83, cumulative reward: -181.99856609553837
Episode: 254/1000, score: 949, cumulative reward: -221.43676804463
Episode: 255/1000, score: 106, cumulative reward: -171.46831505926042
Episode: 256/1000, score: 61, cumulative reward: -97.77660044187277
Episode: 257/1000, score: 517, cumulative reward: -148.98393993601348
Episode: 258/1000, score: 226, cumulative reward: -115.52111986004807
Episode: 259/1000, score: 115, cumulative reward: -21.132889299683413
Episode: 260/1000, score: 752, cumulative reward: -160.72734188529768
Episode: 261/1000, score: 174, cumulative reward: -147.5267345077631
Episode: 262/1000, score: 635, cumulative reward: -237.13562887483744
Episode: 263/1000, score: 197, cumulative reward: -144.53277070995222
Episode: 277/1000, score: 888, cumulative reward: -269.8032714213234
Episode: 279/1000, score: 312, cumulative reward: -188.32216062080522
Episode: 280/1000, score: 382, cumulative reward: -282.7989078060258
Episode: 281/1000, score: 592, cumulative reward: -177.02471682085422
Episode: 283/1000, score: 810, cumulative reward: -316.9004070236288
Episode: 285/1000, score: 230, cumulative reward: -142.37452327100962
Episode: 286/1000, score: 316, cumulative reward: -142.51723134149873
Episode: 287/1000, score: 192, cumulative reward: -252.72116738700777
Episode: 288/1000, score: 124, cumulative reward: -160.95863350295275
Episode: 289/1000, score: 448, cumulative reward: -232.24846576169935
Episode: 290/1000, score: 252, cumulative reward: -192.23062804717978
Episode: 291/1000, score: 382, cumulative reward: -222.81403084713284
Episode: 292/1000, score: 207, cumulative reward: -194.31829059068488
Episode: 293/1000, score: 97, cumulative reward: -140.53402772086486
Episode: 294/1000, score: 125, cumulative reward: -200.12121731793786
Episode: 295/1000, score: 97, cumulative reward: -202.735589275893
Episode: 296/1000, score: 214, cumulative reward: -311.5929859412464
Episode: 297/1000, score: 226, cumulative reward: -347.0186356564085
Episode: 298/1000, score: 164, cumulative reward: -24.58503898905144
Episode: 299/1000, score: 159, cumulative reward: -310.38627924735727
Episode: 300/1000, score: 225, cumulative reward: -23.548514829836535
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 301/1000, score: 84, cumulative reward: -355.0367294965098
Episode: 302/1000, score: 398, cumulative reward: -28.59291569450825
Episode: 303/1000, score: 117, cumulative reward: -281.77255054968316
Episode: 304/1000, score: 143, cumulative reward: -250.3878538856328
Episode: 305/1000, score: 99, cumulative reward: -205.533357585945
Episode: 306/1000, score: 243, cumulative reward: -171.84786909837834
Episode: 307/1000, score: 230, cumulative reward: -223.4899866754235
Episode: 308/1000, score: 204, cumulative reward: -153.70927956525213
Episode: 309/1000, score: 99, cumulative reward: -104.91961398793084
Episode: 310/1000, score: 154, cumulative reward: -99.38468796184318
Episode: 311/1000, score: 107, cumulative reward: -265.49795304824585
Episode: 312/1000, score: 144, cumulative reward: -134.9746631461316
Episode: 313/1000, score: 302, cumulative reward: -203.08060106073827
Episode: 314/1000, score: 197, cumulative reward: -212.3427673933859
Episode: 315/1000, score: 507, cumulative reward: -117.5483086648896
Episode: 316/1000, score: 253, cumulative reward: -359.2651373599576
Episode: 318/1000, score: 430, cumulative reward: -189.66212076416076
Episode: 319/1000, score: 238, cumulative reward: -169.4224566721304
Episode: 320/1000, score: 279, cumulative reward: -123.6131259579721
Episode: 321/1000, score: 672, cumulative reward: -203.43507565506061
Episode: 322/1000, score: 398, cumulative reward: -195.8129358386638
Episode: 323/1000, score: 583, cumulative reward: -251.60399127922955
Episode: 324/1000, score: 339, cumulative reward: -207.2713551313878
Episode: 327/1000, score: 312, cumulative reward: -119.24993971238412
Episode: 328/1000, score: 445, cumulative reward: -199.86505237061147
Episode: 331/1000, score: 817, cumulative reward: -284.0397360225487
Episode: 332/1000, score: 819, cumulative reward: -196.81181467060492
Episode: 333/1000, score: 999, cumulative reward: -301.5073377412434
Episode: 342/1000, score: 604, cumulative reward: -237.3789418067697
Episode: 344/1000, score: 112, cumulative reward: -273.48757807713275
Episode: 346/1000, score: 406, cumulative reward: -193.5222172739887
Episode: 347/1000, score: 69, cumulative reward: -153.056433055176
Episode: 348/1000, score: 72, cumulative reward: -456.7831330470756
Episode: 349/1000, score: 63, cumulative reward: -592.0952220567151
Episode: 350/1000, score: 76, cumulative reward: -501.3494522937216
Episode: 351/1000, score: 66, cumulative reward: -570.8313359926051
Episode: 352/1000, score: 51, cumulative reward: -433.34069080323366
Episode: 353/1000, score: 75, cumulative reward: -573.2552510446487
Episode: 354/1000, score: 56, cumulative reward: -479.7729126010234
Episode: 355/1000, score: 79, cumulative reward: -406.0026744965672
Episode: 356/1000, score: 76, cumulative reward: -448.6140005412209
Episode: 357/1000, score: 64, cumulative reward: -266.2497604359954
Episode: 358/1000, score: 55, cumulative reward: -151.63731844445186
Episode: 359/1000, score: 54, cumulative reward: -161.60520634542607
Episode: 360/1000, score: 75, cumulative reward: -211.74793216864393
Episode: 361/1000, score: 61, cumulative reward: -25.62707663433153
Episode: 362/1000, score: 60, cumulative reward: -384.993706746335
Episode: 363/1000, score: 91, cumulative reward: -214.03073836693204
Episode: 364/1000, score: 104, cumulative reward: -251.63451537159733
Episode: 365/1000, score: 112, cumulative reward: -507.5284191310727
Episode: 366/1000, score: 83, cumulative reward: -201.5219873310886
Episode: 367/1000, score: 127, cumulative reward: -228.17144875863144
Episode: 368/1000, score: 63, cumulative reward: -222.30182846167799
Episode: 369/1000, score: 134, cumulative reward: -357.032731283635
Episode: 370/1000, score: 63, cumulative reward: -147.29061543749324
Episode: 371/1000, score: 75, cumulative reward: -310.37065669392945
Episode: 372/1000, score: 61, cumulative reward: 15.36672100137551
Episode: 373/1000, score: 79, cumulative reward: -427.15487960603014
Episode: 374/1000, score: 77, cumulative reward: -326.43376678337887
Episode: 375/1000, score: 105, cumulative reward: -134.05248540080885
Episode: 376/1000, score: 95, cumulative reward: -297.6096103016373
Episode: 377/1000, score: 82, cumulative reward: -221.66382652170444
Episode: 378/1000, score: 95, cumulative reward: -225.58640750660308
Episode: 379/1000, score: 96, cumulative reward: -294.3365205529217
Episode: 380/1000, score: 83, cumulative reward: -205.8895073685401
Episode: 381/1000, score: 75, cumulative reward: -217.89154010809457
Episode: 382/1000, score: 67, cumulative reward: -323.9116605137352
Episode: 383/1000, score: 72, cumulative reward: -322.5870542779007
Episode: 384/1000, score: 71, cumulative reward: -270.8449394096194
Episode: 385/1000, score: 59, cumulative reward: -109.3710883520024
Episode: 386/1000, score: 82, cumulative reward: -72.27646671094158
Episode: 387/1000, score: 101, cumulative reward: -191.88581490428095
Episode: 388/1000, score: 67, cumulative reward: -138.68713021372758
Episode: 389/1000, score: 67, cumulative reward: -139.91695169443278
Episode: 390/1000, score: 88, cumulative reward: -117.59168899655067
Episode: 391/1000, score: 50, cumulative reward: -96.35159336058176
Episode: 392/1000, score: 82, cumulative reward: -263.45982369018975
Episode: 393/1000, score: 123, cumulative reward: -96.15939056500996
Episode: 394/1000, score: 118, cumulative reward: -301.64786792783264
Episode: 395/1000, score: 52, cumulative reward: -96.73066193860141
Episode: 396/1000, score: 152, cumulative reward: -188.58980943563105
Episode: 397/1000, score: 57, cumulative reward: -77.15707679231895
Episode: 398/1000, score: 81, cumulative reward: -172.9610878490975
Episode: 399/1000, score: 78, cumulative reward: -123.62023765745228
Episode: 400/1000, score: 49, cumulative reward: -100.99491109221836
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 401/1000, score: 127, cumulative reward: -177.33454455235082
Episode: 402/1000, score: 118, cumulative reward: -197.19036272285132
Episode: 403/1000, score: 76, cumulative reward: -97.80045195333243
Episode: 404/1000, score: 53, cumulative reward: -125.82990739578058
Episode: 405/1000, score: 60, cumulative reward: -127.05544264198001
Episode: 406/1000, score: 72, cumulative reward: -206.37612116452806
Episode: 407/1000, score: 73, cumulative reward: -124.34353477039022
Episode: 408/1000, score: 98, cumulative reward: -339.37202500042906
Episode: 409/1000, score: 64, cumulative reward: -66.32336372104473
Episode: 410/1000, score: 92, cumulative reward: -207.73716778250545
Episode: 411/1000, score: 82, cumulative reward: -243.707193130759
Episode: 412/1000, score: 57, cumulative reward: -282.77727317213385
Episode: 413/1000, score: 75, cumulative reward: -135.804238627164
Episode: 414/1000, score: 78, cumulative reward: -138.89094407309747
Episode: 415/1000, score: 100, cumulative reward: -165.69344528951484
Episode: 416/1000, score: 95, cumulative reward: -352.1527726593441
Episode: 417/1000, score: 68, cumulative reward: -101.80432924017403
Episode: 418/1000, score: 83, cumulative reward: -157.27914936798726
Episode: 419/1000, score: 85, cumulative reward: -263.09158201348737
Episode: 420/1000, score: 130, cumulative reward: -180.52708120910734
Episode: 421/1000, score: 119, cumulative reward: -235.26241458889487
Episode: 422/1000, score: 83, cumulative reward: -146.23534414443276
Episode: 423/1000, score: 102, cumulative reward: -87.7688048180501
Episode: 424/1000, score: 90, cumulative reward: -223.7969842751792
Episode: 425/1000, score: 71, cumulative reward: -183.20029246110656
Episode: 426/1000, score: 86, cumulative reward: -145.58383201127353
Episode: 427/1000, score: 79, cumulative reward: -105.13524068864356
Episode: 428/1000, score: 58, cumulative reward: -114.42673918706802
Episode: 429/1000, score: 92, cumulative reward: -84.32106339716914
Episode: 430/1000, score: 52, cumulative reward: -112.8882612120241
Episode: 431/1000, score: 67, cumulative reward: -129.88875123429662
Episode: 432/1000, score: 57, cumulative reward: -103.1203030076552
Episode: 433/1000, score: 106, cumulative reward: -93.10589412434314
Episode: 434/1000, score: 84, cumulative reward: -118.45266321080355
Episode: 435/1000, score: 66, cumulative reward: -230.1895269290828
Episode: 436/1000, score: 103, cumulative reward: -180.80339927305204
Episode: 437/1000, score: 109, cumulative reward: -85.21540632916387
Episode: 438/1000, score: 84, cumulative reward: -248.19868064674932
Episode: 439/1000, score: 120, cumulative reward: -222.61082364799609
Episode: 440/1000, score: 194, cumulative reward: -75.31624108985432
Episode: 441/1000, score: 69, cumulative reward: -116.88684802572575
Episode: 442/1000, score: 76, cumulative reward: -59.789818381445315
Episode: 443/1000, score: 155, cumulative reward: -159.20591205848172
Episode: 444/1000, score: 75, cumulative reward: -140.88014609049108
Episode: 445/1000, score: 133, cumulative reward: -144.67969205893402
Episode: 446/1000, score: 111, cumulative reward: -130.44685650445805
Episode: 447/1000, score: 103, cumulative reward: -133.6132165352394
Episode: 448/1000, score: 79, cumulative reward: -80.83389413459243
Episode: 449/1000, score: 108, cumulative reward: -183.36198438524636
Episode: 450/1000, score: 130, cumulative reward: -135.37535998180385
Episode: 451/1000, score: 103, cumulative reward: 26.7713822893718
Episode: 452/1000, score: 77, cumulative reward: -108.81921310596532
Episode: 453/1000, score: 94, cumulative reward: -152.63705627179405
Episode: 454/1000, score: 134, cumulative reward: -91.58743265185855
Episode: 455/1000, score: 190, cumulative reward: -9.613882287974022
Episode: 456/1000, score: 159, cumulative reward: -465.3342447764556
Episode: 457/1000, score: 144, cumulative reward: -188.85400568408852
Episode: 458/1000, score: 54, cumulative reward: -81.68153079759833
Episode: 459/1000, score: 111, cumulative reward: -137.16643905498324
Episode: 460/1000, score: 188, cumulative reward: -164.43501445240918
Episode: 461/1000, score: 69, cumulative reward: -272.68232005870095
Episode: 462/1000, score: 72, cumulative reward: -84.58956628478077
Episode: 463/1000, score: 79, cumulative reward: -507.04683270669335
Episode: 464/1000, score: 93, cumulative reward: -522.8990940014307
Episode: 465/1000, score: 95, cumulative reward: -114.30459475311298
Episode: 466/1000, score: 158, cumulative reward: -74.05727501204598
Episode: 467/1000, score: 151, cumulative reward: -95.47937818402532
Episode: 468/1000, score: 112, cumulative reward: -612.4468931361868
Episode: 469/1000, score: 84, cumulative reward: -252.4017546389705
Episode: 470/1000, score: 98, cumulative reward: -314.75469055810584
Episode: 471/1000, score: 89, cumulative reward: -169.7235425937278
Episode: 472/1000, score: 94, cumulative reward: -313.91738136766753
Episode: 473/1000, score: 65, cumulative reward: -118.26562323711704
Episode: 474/1000, score: 84, cumulative reward: -76.48133179461112
Episode: 475/1000, score: 92, cumulative reward: -199.84768537024925
Episode: 476/1000, score: 162, cumulative reward: -234.3310869924374
Episode: 477/1000, score: 61, cumulative reward: -141.6915781446876
Episode: 478/1000, score: 134, cumulative reward: -95.83130869811814
Episode: 479/1000, score: 71, cumulative reward: -293.9450251671659
Episode: 480/1000, score: 108, cumulative reward: -112.81260570444793
Episode: 481/1000, score: 103, cumulative reward: -140.0079788534832
Episode: 482/1000, score: 144, cumulative reward: -274.0784744221445
Episode: 483/1000, score: 86, cumulative reward: -327.77807135183195
Episode: 484/1000, score: 77, cumulative reward: -331.31883340204774
Episode: 485/1000, score: 83, cumulative reward: -168.44740671687964
Episode: 486/1000, score: 87, cumulative reward: -214.50996190316584
Episode: 487/1000, score: 100, cumulative reward: -62.70093016321059
Episode: 488/1000, score: 78, cumulative reward: -166.58877868406427
Episode: 489/1000, score: 126, cumulative reward: -193.84212546313313
Episode: 490/1000, score: 71, cumulative reward: -142.0058021746518
Episode: 491/1000, score: 106, cumulative reward: -49.009153341705684
Episode: 492/1000, score: 66, cumulative reward: -130.98486481083594
Episode: 493/1000, score: 76, cumulative reward: -249.03227137812922
Episode: 494/1000, score: 122, cumulative reward: -382.498951741017
Episode: 495/1000, score: 71, cumulative reward: -54.69913345186597
Episode: 496/1000, score: 71, cumulative reward: -190.09157444970543
Episode: 497/1000, score: 103, cumulative reward: -491.73103575423926
Episode: 498/1000, score: 72, cumulative reward: -142.59663051670043
Episode: 499/1000, score: 97, cumulative reward: -104.6150911833184
Episode: 500/1000, score: 90, cumulative reward: -403.7230949110932
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 501/1000, score: 82, cumulative reward: -136.45749665934002
Episode: 502/1000, score: 65, cumulative reward: -138.77080814464836
Episode: 503/1000, score: 55, cumulative reward: -78.36670772818098
Episode: 504/1000, score: 64, cumulative reward: -96.34051841938168
Episode: 505/1000, score: 140, cumulative reward: -430.6642275835693
Episode: 506/1000, score: 91, cumulative reward: -100.85167284928116
Episode: 507/1000, score: 86, cumulative reward: -315.8376158438706
Episode: 508/1000, score: 175, cumulative reward: -385.41358001227456
Episode: 509/1000, score: 102, cumulative reward: -124.41434529893266
Episode: 510/1000, score: 59, cumulative reward: -218.0276121287548
Episode: 511/1000, score: 127, cumulative reward: -410.4310184811362
Episode: 512/1000, score: 116, cumulative reward: -599.6414299714877
Episode: 513/1000, score: 85, cumulative reward: -167.22841812921288
Episode: 514/1000, score: 93, cumulative reward: -11.295398546056163
Episode: 515/1000, score: 72, cumulative reward: -76.07356571335936
Episode: 516/1000, score: 63, cumulative reward: -217.0427790358855
Episode: 517/1000, score: 300, cumulative reward: -147.56358829994628
Episode: 518/1000, score: 85, cumulative reward: -168.4380350510707
Episode: 519/1000, score: 99, cumulative reward: -85.23659736970733
Episode: 520/1000, score: 116, cumulative reward: -57.38461701637423
Episode: 521/1000, score: 110, cumulative reward: -462.7551399086079
Episode: 522/1000, score: 75, cumulative reward: -153.246728013015
Episode: 523/1000, score: 79, cumulative reward: -7.403940350301397
Episode: 524/1000, score: 116, cumulative reward: -67.36919307569124
Episode: 525/1000, score: 101, cumulative reward: -100.36215090546567
Episode: 526/1000, score: 111, cumulative reward: 5.1603869632602795
Episode: 527/1000, score: 123, cumulative reward: -278.1393916618117
Episode: 528/1000, score: 52, cumulative reward: -200.31328866758366
Episode: 529/1000, score: 103, cumulative reward: -110.16555811198158
Episode: 530/1000, score: 82, cumulative reward: -332.23967104584335
Episode: 531/1000, score: 123, cumulative reward: -389.41056493173517
Episode: 532/1000, score: 66, cumulative reward: -523.5009508068729
Episode: 533/1000, score: 81, cumulative reward: -525.4295896364239
Episode: 534/1000, score: 81, cumulative reward: -588.1569635971621
Episode: 535/1000, score: 104, cumulative reward: -115.90328235957156
Episode: 536/1000, score: 105, cumulative reward: -177.25107479163722
Episode: 537/1000, score: 78, cumulative reward: -406.9642029548344
Episode: 538/1000, score: 127, cumulative reward: -396.394666825448
Episode: 539/1000, score: 108, cumulative reward: -204.0386288098
Episode: 540/1000, score: 112, cumulative reward: -319.3733045199233
Episode: 541/1000, score: 229, cumulative reward: -183.52716561880843
Episode: 542/1000, score: 76, cumulative reward: -217.51154416367706
Episode: 543/1000, score: 75, cumulative reward: -433.9428086753317
Episode: 544/1000, score: 205, cumulative reward: -329.97724073783957
Episode: 545/1000, score: 106, cumulative reward: 39.41149912711683
Episode: 546/1000, score: 79, cumulative reward: -438.69186553509803
Episode: 547/1000, score: 93, cumulative reward: -300.6951929828642
Episode: 548/1000, score: 96, cumulative reward: -263.60869535221065
Episode: 549/1000, score: 110, cumulative reward: -72.52605587107585
Episode: 550/1000, score: 169, cumulative reward: -296.4096191001922
Episode: 551/1000, score: 91, cumulative reward: -215.0043735384504
Episode: 552/1000, score: 71, cumulative reward: -50.6989149254359
Episode: 553/1000, score: 87, cumulative reward: -253.96082220395914
Episode: 554/1000, score: 93, cumulative reward: -216.57088690235668
Episode: 555/1000, score: 169, cumulative reward: -8.731987352033315
Episode: 556/1000, score: 132, cumulative reward: -183.54129964319094
Episode: 557/1000, score: 58, cumulative reward: -371.25580070842284
Episode: 558/1000, score: 337, cumulative reward: -40.38607231437612
Episode: 559/1000, score: 291, cumulative reward: -331.47426721686924
Episode: 560/1000, score: 111, cumulative reward: -294.2781004561607
Episode: 561/1000, score: 108, cumulative reward: -294.75981025840065
Episode: 562/1000, score: 122, cumulative reward: -129.7011630318343
Episode: 563/1000, score: 148, cumulative reward: -290.56086042860215
Episode: 564/1000, score: 101, cumulative reward: -314.62747119560964
Episode: 565/1000, score: 154, cumulative reward: -87.86636781066206
Episode: 566/1000, score: 173, cumulative reward: -88.89875804311748
Episode: 567/1000, score: 243, cumulative reward: -300.2053519708577
Episode: 568/1000, score: 82, cumulative reward: -472.8328059953616
Episode: 569/1000, score: 109, cumulative reward: -264.0279626766161
Episode: 570/1000, score: 136, cumulative reward: -200.19020703028866
Episode: 571/1000, score: 73, cumulative reward: -60.61370817326208
Episode: 572/1000, score: 86, cumulative reward: -235.3425453204265
Episode: 573/1000, score: 78, cumulative reward: -251.2834247522461
Episode: 574/1000, score: 117, cumulative reward: -5.019003486016544
Episode: 575/1000, score: 110, cumulative reward: -170.7276807859321
Episode: 576/1000, score: 132, cumulative reward: -56.6732781222386
Episode: 577/1000, score: 135, cumulative reward: -308.022420045187
Episode: 578/1000, score: 73, cumulative reward: -526.7061648555821
Episode: 579/1000, score: 112, cumulative reward: -110.76235303113378
Episode: 580/1000, score: 94, cumulative reward: -214.2394636921511
Episode: 581/1000, score: 66, cumulative reward: -30.68774522722798
Episode: 582/1000, score: 91, cumulative reward: -531.2883712455437
Episode: 583/1000, score: 73, cumulative reward: -109.30922735121831
Episode: 584/1000, score: 117, cumulative reward: -181.8849962911611
Episode: 585/1000, score: 82, cumulative reward: -270.0109392176954
Episode: 586/1000, score: 69, cumulative reward: -525.337067168539
Episode: 587/1000, score: 98, cumulative reward: -220.73174828805966
Episode: 588/1000, score: 113, cumulative reward: -14.716701884947582
Episode: 589/1000, score: 91, cumulative reward: -222.22440363637577
Episode: 590/1000, score: 88, cumulative reward: -103.22039798501989
Episode: 591/1000, score: 70, cumulative reward: -203.06568837222696
Episode: 592/1000, score: 80, cumulative reward: -126.61884482744597
Episode: 593/1000, score: 146, cumulative reward: -176.21663148353298
Episode: 594/1000, score: 106, cumulative reward: 28.624438357770828
Episode: 595/1000, score: 111, cumulative reward: -301.3882211176188
Episode: 596/1000, score: 58, cumulative reward: -268.8444459279765
Episode: 597/1000, score: 55, cumulative reward: -122.26390150587416
Episode: 598/1000, score: 130, cumulative reward: -140.821074685683
Episode: 599/1000, score: 102, cumulative reward: -121.42980660485175
Episode: 600/1000, score: 158, cumulative reward: -101.02054800329243
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 601/1000, score: 107, cumulative reward: -218.0326103645114
Episode: 602/1000, score: 58, cumulative reward: -145.2564003930995
Episode: 603/1000, score: 83, cumulative reward: -35.50118174967591
Episode: 604/1000, score: 71, cumulative reward: -105.56453631694608
Episode: 605/1000, score: 91, cumulative reward: -34.95098280912242
Episode: 606/1000, score: 118, cumulative reward: -87.98291846458783
Episode: 607/1000, score: 97, cumulative reward: -114.52850693575166
Episode: 608/1000, score: 94, cumulative reward: -139.17907126134975
Episode: 609/1000, score: 89, cumulative reward: -219.9505439232759
Episode: 610/1000, score: 99, cumulative reward: -120.7094099543355
Episode: 611/1000, score: 171, cumulative reward: -186.13517451976787
Episode: 612/1000, score: 59, cumulative reward: -248.48102701643967
Episode: 613/1000, score: 54, cumulative reward: -185.9214665233606
Episode: 614/1000, score: 121, cumulative reward: -93.82702577762382
Episode: 615/1000, score: 62, cumulative reward: -219.35773073767393
Episode: 616/1000, score: 183, cumulative reward: -173.18614450436675
Episode: 617/1000, score: 76, cumulative reward: -101.2515099703119
Episode: 618/1000, score: 54, cumulative reward: -119.7078742472082
Episode: 619/1000, score: 84, cumulative reward: -101.25161151833113
Episode: 620/1000, score: 62, cumulative reward: -92.24779654127337
Episode: 621/1000, score: 67, cumulative reward: -188.4817486656745
Episode: 622/1000, score: 137, cumulative reward: -196.75507497618622
Episode: 623/1000, score: 60, cumulative reward: -70.8458789374502
Episode: 624/1000, score: 179, cumulative reward: -126.1679438145171
Episode: 625/1000, score: 98, cumulative reward: -210.14773599974046
Episode: 626/1000, score: 114, cumulative reward: -117.01436815506183
Episode: 627/1000, score: 113, cumulative reward: -73.06094621955455
Episode: 628/1000, score: 131, cumulative reward: -292.2911656856299
Episode: 629/1000, score: 91, cumulative reward: -172.88510675091902
Episode: 630/1000, score: 85, cumulative reward: -58.94265190873471
Episode: 631/1000, score: 129, cumulative reward: -99.11133873771382
Episode: 632/1000, score: 64, cumulative reward: -7.52048475724466
Episode: 633/1000, score: 98, cumulative reward: -111.70948311881509
Episode: 634/1000, score: 78, cumulative reward: -120.1253168586921
Episode: 635/1000, score: 76, cumulative reward: -511.04204720144855
Episode: 636/1000, score: 147, cumulative reward: -292.2199903902762
Episode: 637/1000, score: 51, cumulative reward: -211.40514435111223
Episode: 638/1000, score: 106, cumulative reward: -559.9766421401862
Episode: 639/1000, score: 90, cumulative reward: -378.67298273720104
Episode: 640/1000, score: 80, cumulative reward: -211.26430176652465
Episode: 641/1000, score: 65, cumulative reward: -106.42149379123181
Episode: 642/1000, score: 136, cumulative reward: -107.40095609371693
Episode: 643/1000, score: 105, cumulative reward: -260.02586483775485
Episode: 644/1000, score: 139, cumulative reward: -159.18456491399127
Episode: 645/1000, score: 121, cumulative reward: -150.68245908455998
Episode: 646/1000, score: 61, cumulative reward: -93.87716387241431
Episode: 647/1000, score: 117, cumulative reward: -228.6309057234472
Episode: 648/1000, score: 102, cumulative reward: -213.76579957682708
Episode: 649/1000, score: 98, cumulative reward: -124.217828143345
Episode: 650/1000, score: 130, cumulative reward: -184.12846511071768
Episode: 651/1000, score: 117, cumulative reward: -103.4085724003138
Episode: 652/1000, score: 132, cumulative reward: -117.20568963584394
Episode: 653/1000, score: 94, cumulative reward: -175.34568981280512
Episode: 654/1000, score: 95, cumulative reward: -163.9114959306409
Episode: 655/1000, score: 77, cumulative reward: -112.63975142849787
Episode: 656/1000, score: 160, cumulative reward: -237.1716124945427
Episode: 657/1000, score: 143, cumulative reward: -121.56846473664946
Episode: 658/1000, score: 88, cumulative reward: -166.78415176327923
Episode: 659/1000, score: 107, cumulative reward: -491.15480255108037
Episode: 660/1000, score: 98, cumulative reward: -70.0593863683614
Episode: 661/1000, score: 57, cumulative reward: -93.95322759984765
Episode: 662/1000, score: 89, cumulative reward: -175.70734411055736
Episode: 663/1000, score: 105, cumulative reward: -130.17267625444086
Episode: 664/1000, score: 58, cumulative reward: -312.142539319337
Episode: 665/1000, score: 67, cumulative reward: -115.48084634470214
Episode: 666/1000, score: 129, cumulative reward: -280.64199556483527
Episode: 667/1000, score: 106, cumulative reward: -226.18137301377195
Episode: 668/1000, score: 152, cumulative reward: -204.1602810356604
Episode: 669/1000, score: 155, cumulative reward: -522.0024248793928
Episode: 670/1000, score: 72, cumulative reward: -333.2557849388453
Episode: 671/1000, score: 117, cumulative reward: -201.05030497923246
Episode: 672/1000, score: 146, cumulative reward: -247.3185540524038
Episode: 673/1000, score: 94, cumulative reward: -5.382263174532227
Episode: 674/1000, score: 125, cumulative reward: -374.4298605676687
Episode: 675/1000, score: 145, cumulative reward: -247.00960311788293
Episode: 676/1000, score: 181, cumulative reward: -232.95068649837353
Episode: 677/1000, score: 115, cumulative reward: -140.98187166175722
Episode: 678/1000, score: 115, cumulative reward: -232.7383237606115
Episode: 679/1000, score: 190, cumulative reward: -472.3980268915965
Episode: 680/1000, score: 100, cumulative reward: -3.485907707805694
Episode: 681/1000, score: 76, cumulative reward: -199.03664478557283
Episode: 682/1000, score: 81, cumulative reward: -111.84086737115888
Episode: 683/1000, score: 109, cumulative reward: -65.79740479190647
Episode: 684/1000, score: 73, cumulative reward: -102.81289265468608
Episode: 685/1000, score: 74, cumulative reward: -78.58513502454358
Episode: 686/1000, score: 94, cumulative reward: -92.95123585747383
Episode: 687/1000, score: 209, cumulative reward: -11.78860540883285
Episode: 688/1000, score: 137, cumulative reward: -5.700619107386899
Episode: 689/1000, score: 75, cumulative reward: -99.48599240319366
Episode: 690/1000, score: 67, cumulative reward: -127.35310963248372
Episode: 691/1000, score: 80, cumulative reward: -166.6543801549107
Episode: 692/1000, score: 295, cumulative reward: -182.63138628332712
Episode: 693/1000, score: 100, cumulative reward: -450.62252213321074
Episode: 694/1000, score: 73, cumulative reward: -95.9561360333627
Episode: 695/1000, score: 103, cumulative reward: -89.11608826772532
Episode: 696/1000, score: 95, cumulative reward: -91.70552795436602
Episode: 697/1000, score: 88, cumulative reward: -425.79744173715096
Episode: 698/1000, score: 59, cumulative reward: -73.24423078041116
Episode: 699/1000, score: 71, cumulative reward: -154.77087933504646
Episode: 700/1000, score: 85, cumulative reward: -814.3354627139794
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 701/1000, score: 96, cumulative reward: -39.973985071009835
Episode: 702/1000, score: 91, cumulative reward: -418.9541457546992
Episode: 703/1000, score: 95, cumulative reward: -64.35815179791216
Episode: 704/1000, score: 81, cumulative reward: -48.17603391599731
Episode: 705/1000, score: 83, cumulative reward: -166.65236964711923
Episode: 706/1000, score: 66, cumulative reward: -116.67158410848617
Episode: 707/1000, score: 119, cumulative reward: -16.947725678944835
Episode: 708/1000, score: 85, cumulative reward: -199.29803440609314
Episode: 709/1000, score: 258, cumulative reward: -183.15381004956197
Episode: 710/1000, score: 119, cumulative reward: -42.232957881328275
Episode: 711/1000, score: 118, cumulative reward: -215.84996202349163
Episode: 712/1000, score: 53, cumulative reward: -72.1498555702427
Episode: 713/1000, score: 65, cumulative reward: -199.09724808536157
Episode: 714/1000, score: 150, cumulative reward: -262.7804947836827
Episode: 715/1000, score: 94, cumulative reward: -349.0989552945641
Episode: 716/1000, score: 101, cumulative reward: -142.4159121152789
Episode: 717/1000, score: 110, cumulative reward: -75.95826705315314
Episode: 718/1000, score: 55, cumulative reward: -199.95824918167241
Episode: 719/1000, score: 87, cumulative reward: -198.06153203003447
Episode: 720/1000, score: 83, cumulative reward: -152.1952537257933
Episode: 721/1000, score: 99, cumulative reward: -76.30604067332686
Episode: 722/1000, score: 107, cumulative reward: -0.1622972140016401
Episode: 723/1000, score: 83, cumulative reward: -51.13568324698137
Episode: 724/1000, score: 51, cumulative reward: -86.4807112789159
Episode: 725/1000, score: 54, cumulative reward: -136.93573305686874
Episode: 726/1000, score: 80, cumulative reward: -265.91656064131723
Episode: 727/1000, score: 81, cumulative reward: -127.58408499485469
Episode: 728/1000, score: 106, cumulative reward: -178.71210621992537
Episode: 729/1000, score: 60, cumulative reward: -268.5758141954027
Episode: 730/1000, score: 74, cumulative reward: 54.08585455713518
Episode: 731/1000, score: 85, cumulative reward: -93.58041802097168
Episode: 732/1000, score: 140, cumulative reward: -243.11120074720046
Episode: 733/1000, score: 92, cumulative reward: -131.26844801229822
Episode: 734/1000, score: 76, cumulative reward: -116.59035315316135
Episode: 735/1000, score: 143, cumulative reward: -172.6501000332542
Episode: 736/1000, score: 91, cumulative reward: -319.6250272456718
Episode: 737/1000, score: 282, cumulative reward: -73.12850820420948
Episode: 738/1000, score: 161, cumulative reward: -58.52525513265503
Episode: 739/1000, score: 83, cumulative reward: -389.46999962079497
Episode: 740/1000, score: 75, cumulative reward: -67.12823464912461
Episode: 741/1000, score: 136, cumulative reward: -281.83876358306077
Episode: 742/1000, score: 223, cumulative reward: -39.22262462945068
Episode: 743/1000, score: 368, cumulative reward: -7.464592746880641
Episode: 744/1000, score: 95, cumulative reward: -156.0322259556924
Episode: 745/1000, score: 315, cumulative reward: -48.66917688822289
Episode: 746/1000, score: 613, cumulative reward: -305.1889372919426
Episode: 747/1000, score: 404, cumulative reward: -354.79899889355687
Episode: 748/1000, score: 94, cumulative reward: -184.17319685544436
Episode: 749/1000, score: 158, cumulative reward: -775.8082835512652
Episode: 750/1000, score: 137, cumulative reward: -54.694368555940116
Episode: 751/1000, score: 75, cumulative reward: -130.58731251417854
Episode: 752/1000, score: 78, cumulative reward: -20.088262102945905
Episode: 753/1000, score: 273, cumulative reward: -556.48511591419
Episode: 754/1000, score: 54, cumulative reward: -309.6683755257019
Episode: 755/1000, score: 96, cumulative reward: -458.48446469662144
Episode: 756/1000, score: 322, cumulative reward: -432.4901804577057
Episode: 757/1000, score: 148, cumulative reward: -518.0162886836065
Episode: 758/1000, score: 95, cumulative reward: -443.22073840860924
Episode: 759/1000, score: 120, cumulative reward: -80.03591124823791
Episode: 760/1000, score: 133, cumulative reward: -299.1269256441917
Episode: 761/1000, score: 287, cumulative reward: -353.5690984428557
Episode: 762/1000, score: 337, cumulative reward: -314.55624187483284
Episode: 763/1000, score: 264, cumulative reward: -65.12782883627932
Episode: 764/1000, score: 162, cumulative reward: -317.3376505173657
Episode: 765/1000, score: 118, cumulative reward: -136.7652023733225
Episode: 766/1000, score: 151, cumulative reward: -10.29205599888094
Episode: 767/1000, score: 158, cumulative reward: -493.10333634543673
Episode: 768/1000, score: 109, cumulative reward: -113.49397018242695
Episode: 769/1000, score: 168, cumulative reward: -299.209856186337
Episode: 770/1000, score: 119, cumulative reward: -214.74468504523767
Episode: 771/1000, score: 227, cumulative reward: -453.27148516350377
Episode: 772/1000, score: 111, cumulative reward: -19.527627937592086
Episode: 773/1000, score: 107, cumulative reward: -228.21850969471916
Episode: 774/1000, score: 148, cumulative reward: -179.2418481069906
Episode: 775/1000, score: 92, cumulative reward: -273.5500451871743
Episode: 776/1000, score: 175, cumulative reward: -84.21665350850867
Episode: 777/1000, score: 126, cumulative reward: -143.21992796648755
Episode: 778/1000, score: 165, cumulative reward: -200.4001718262649
Episode: 779/1000, score: 80, cumulative reward: -443.02655376314686
Episode: 780/1000, score: 71, cumulative reward: -96.30100460397418
Episode: 781/1000, score: 217, cumulative reward: -243.25542861076988
Episode: 782/1000, score: 108, cumulative reward: -160.26167667416388
Episode: 783/1000, score: 105, cumulative reward: -78.04548894941738
Episode: 784/1000, score: 141, cumulative reward: 51.52021542134389
Episode: 785/1000, score: 96, cumulative reward: -15.766238225078112
Episode: 786/1000, score: 127, cumulative reward: -150.7355734637117
Episode: 787/1000, score: 61, cumulative reward: -69.36612085129939
Episode: 788/1000, score: 92, cumulative reward: -149.11788608765409
Episode: 789/1000, score: 80, cumulative reward: -363.0284420822761
Episode: 790/1000, score: 68, cumulative reward: -209.0242595891762
Episode: 791/1000, score: 132, cumulative reward: -49.71561467910964
Episode: 792/1000, score: 87, cumulative reward: -116.53645212345607
Episode: 793/1000, score: 65, cumulative reward: -333.8032411690766
Episode: 794/1000, score: 87, cumulative reward: -117.70858972447833
Episode: 795/1000, score: 57, cumulative reward: -318.3289133467543
Episode: 796/1000, score: 65, cumulative reward: -325.4511675547326
Episode: 797/1000, score: 72, cumulative reward: -142.86792239884448
Episode: 798/1000, score: 97, cumulative reward: 3.9656131187909693
Episode: 799/1000, score: 73, cumulative reward: -117.858646965591
Episode: 800/1000, score: 68, cumulative reward: -82.12749945102578
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 801/1000, score: 67, cumulative reward: -120.03747555006915
Episode: 802/1000, score: 89, cumulative reward: -106.59871449446615
Episode: 803/1000, score: 116, cumulative reward: -76.80978183153789
Episode: 804/1000, score: 65, cumulative reward: -83.16744372495327
Episode: 805/1000, score: 73, cumulative reward: -82.23840137552534
Episode: 806/1000, score: 67, cumulative reward: -177.8787105763285
Episode: 807/1000, score: 120, cumulative reward: 39.21853291322958
Episode: 808/1000, score: 160, cumulative reward: -373.12519414760243
Episode: 809/1000, score: 93, cumulative reward: -271.38078364605593
Episode: 810/1000, score: 534, cumulative reward: 184.2175265436946
Episode: 811/1000, score: 58, cumulative reward: -309.04666192469324
Episode: 812/1000, score: 59, cumulative reward: -316.95079873310533
Episode: 813/1000, score: 125, cumulative reward: -66.60091517777042
Episode: 814/1000, score: 61, cumulative reward: -571.1750404039432
Episode: 815/1000, score: 57, cumulative reward: -119.7413565550197
Episode: 816/1000, score: 89, cumulative reward: 58.25094580139992
Episode: 817/1000, score: 112, cumulative reward: -146.9684821388039
Episode: 818/1000, score: 85, cumulative reward: -374.33045812574744
Episode: 819/1000, score: 106, cumulative reward: -26.04639703957382
Episode: 820/1000, score: 89, cumulative reward: -387.1159820582075
Episode: 821/1000, score: 88, cumulative reward: -143.04006339899604
Episode: 822/1000, score: 121, cumulative reward: -392.3677565870071
Episode: 823/1000, score: 83, cumulative reward: -129.0566632785197
Episode: 824/1000, score: 135, cumulative reward: -34.941963090023634
Episode: 825/1000, score: 97, cumulative reward: -33.39141611617518
Episode: 826/1000, score: 88, cumulative reward: -221.7726298061834
Episode: 827/1000, score: 246, cumulative reward: -401.91709128099257
Episode: 828/1000, score: 92, cumulative reward: -278.0828475264374
Episode: 829/1000, score: 152, cumulative reward: -271.9975933133288
Episode: 830/1000, score: 106, cumulative reward: -64.18340877231714
Episode: 831/1000, score: 112, cumulative reward: -168.31695889962305
Episode: 832/1000, score: 96, cumulative reward: -139.5614659671998
Episode: 833/1000, score: 70, cumulative reward: -118.76086866113991
Episode: 834/1000, score: 53, cumulative reward: -104.63760022607858
Episode: 835/1000, score: 69, cumulative reward: -85.3506390632022
Episode: 836/1000, score: 123, cumulative reward: -65.57721752825279
Episode: 837/1000, score: 66, cumulative reward: -147.1244459639369
Episode: 838/1000, score: 87, cumulative reward: -69.88624433711647
Episode: 839/1000, score: 110, cumulative reward: -14.143494948994416
Episode: 840/1000, score: 99, cumulative reward: -88.68506288080471
Episode: 841/1000, score: 70, cumulative reward: -104.00971504538397
Episode: 842/1000, score: 80, cumulative reward: -66.50410343649298
Episode: 843/1000, score: 93, cumulative reward: -140.78702166194964
Episode: 844/1000, score: 85, cumulative reward: -181.77733427410033
Episode: 845/1000, score: 106, cumulative reward: -142.79593152412863
Episode: 846/1000, score: 337, cumulative reward: -221.1598590630469
Episode: 847/1000, score: 63, cumulative reward: -159.9212747797046
Episode: 848/1000, score: 138, cumulative reward: -144.88975616239688
Episode: 849/1000, score: 74, cumulative reward: -97.81676119733504
Episode: 850/1000, score: 89, cumulative reward: -111.91900737391343
Episode: 851/1000, score: 127, cumulative reward: -96.79302400316877
Episode: 852/1000, score: 143, cumulative reward: -110.59404758073364
Episode: 853/1000, score: 182, cumulative reward: -62.36233362626544
Episode: 854/1000, score: 251, cumulative reward: -129.19423060084554
Episode: 855/1000, score: 192, cumulative reward: -309.21535592492387
Episode: 856/1000, score: 102, cumulative reward: -115.36714867979116
Episode: 857/1000, score: 109, cumulative reward: -138.9918256171045
Episode: 858/1000, score: 194, cumulative reward: -89.90727203839047
Episode: 859/1000, score: 143, cumulative reward: -59.12752870777672
Episode: 860/1000, score: 111, cumulative reward: -108.5363153605819
Episode: 861/1000, score: 289, cumulative reward: -79.48445616341652
Episode: 862/1000, score: 338, cumulative reward: -92.9959494179733
Episode: 863/1000, score: 440, cumulative reward: -329.17004669731165
Episode: 864/1000, score: 167, cumulative reward: -296.83951557190494
Episode: 865/1000, score: 117, cumulative reward: -212.95848635994196
Episode: 866/1000, score: 170, cumulative reward: -207.76872884229778
Episode: 867/1000, score: 234, cumulative reward: -432.53778262987885
Episode: 868/1000, score: 94, cumulative reward: -283.4184452662903
Episode: 869/1000, score: 244, cumulative reward: -214.62953383415777
Episode: 870/1000, score: 85, cumulative reward: -497.9089193963961
Episode: 871/1000, score: 105, cumulative reward: -149.84544673281002
Episode: 872/1000, score: 70, cumulative reward: -530.5322587404004
Episode: 873/1000, score: 188, cumulative reward: -87.29798584628766
Episode: 874/1000, score: 124, cumulative reward: -53.8851802510685
Episode: 875/1000, score: 148, cumulative reward: -132.5848355251697
Episode: 876/1000, score: 287, cumulative reward: -155.80850782433902
Episode: 877/1000, score: 197, cumulative reward: -154.57270298988243
Episode: 878/1000, score: 104, cumulative reward: -109.63325546681362
Episode: 879/1000, score: 99, cumulative reward: -208.37254812410896
Episode: 880/1000, score: 65, cumulative reward: -196.10691892733269
Episode: 881/1000, score: 122, cumulative reward: -34.43103006515628
Episode: 882/1000, score: 86, cumulative reward: -70.21220697240292
Episode: 883/1000, score: 102, cumulative reward: -37.01387867375456
Episode: 884/1000, score: 149, cumulative reward: -79.49569453825029
Episode: 885/1000, score: 64, cumulative reward: -155.56400756961983
Episode: 886/1000, score: 472, cumulative reward: -233.04630068379092
Episode: 887/1000, score: 142, cumulative reward: -78.1449582149701
Episode: 888/1000, score: 102, cumulative reward: -22.450795873724857
Episode: 889/1000, score: 112, cumulative reward: -420.7618770077582
Episode: 890/1000, score: 95, cumulative reward: -112.47047671692339
Episode: 891/1000, score: 141, cumulative reward: -22.47942018702156
Episode: 892/1000, score: 219, cumulative reward: -206.3328930152772
Episode: 893/1000, score: 118, cumulative reward: -247.28543384531818
Episode: 894/1000, score: 223, cumulative reward: -285.0790692248436
Episode: 895/1000, score: 190, cumulative reward: -190.91448616630618
Episode: 896/1000, score: 120, cumulative reward: -201.64498002358707
Episode: 897/1000, score: 105, cumulative reward: -120.9909464902333
Episode: 898/1000, score: 161, cumulative reward: -218.31294479646547
Episode: 899/1000, score: 133, cumulative reward: -258.0019343212051
Episode: 900/1000, score: 81, cumulative reward: -214.62336239422234
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 901/1000, score: 59, cumulative reward: -176.17135348969674
Episode: 902/1000, score: 96, cumulative reward: -131.56181917152244
Episode: 903/1000, score: 179, cumulative reward: -88.9429951770123
Episode: 904/1000, score: 143, cumulative reward: -12.4555386945284
Episode: 905/1000, score: 116, cumulative reward: 20.53007946961924
Episode: 906/1000, score: 92, cumulative reward: -107.81726302499183
Episode: 907/1000, score: 74, cumulative reward: -296.199577792316
Episode: 908/1000, score: 128, cumulative reward: -57.5180262633263
Episode: 909/1000, score: 143, cumulative reward: -58.1382796237457
Episode: 910/1000, score: 176, cumulative reward: -95.76965327524793
Episode: 911/1000, score: 121, cumulative reward: -132.2576564810219
Episode: 912/1000, score: 147, cumulative reward: -148.93301475013033
Episode: 913/1000, score: 164, cumulative reward: -43.833440987101724
Episode: 914/1000, score: 132, cumulative reward: -63.811210502096074
Episode: 915/1000, score: 122, cumulative reward: -83.05470319608283
Episode: 916/1000, score: 577, cumulative reward: -232.3813632780429
Episode: 917/1000, score: 110, cumulative reward: 20.931831723149187
Episode: 918/1000, score: 100, cumulative reward: -246.90000169450514
Episode: 919/1000, score: 273, cumulative reward: -66.9941271138515
Episode: 920/1000, score: 144, cumulative reward: -154.60279227074
Episode: 921/1000, score: 314, cumulative reward: -229.04040900239556
Episode: 922/1000, score: 115, cumulative reward: -103.70401575156123
Episode: 923/1000, score: 87, cumulative reward: -268.5605011284547
Episode: 924/1000, score: 77, cumulative reward: -44.64714353108696
Episode: 925/1000, score: 102, cumulative reward: -93.75094267057786
Episode: 926/1000, score: 297, cumulative reward: -247.13163881841808
Episode: 927/1000, score: 242, cumulative reward: -136.01852476094936
Episode: 928/1000, score: 162, cumulative reward: -287.07142504087096
Episode: 929/1000, score: 140, cumulative reward: -15.793925763953766
Episode: 930/1000, score: 172, cumulative reward: 2.689850026764276
Episode: 931/1000, score: 107, cumulative reward: -370.8859211659759
Episode: 932/1000, score: 247, cumulative reward: -104.27312351859088
Episode: 933/1000, score: 104, cumulative reward: -78.29042640402727
Episode: 934/1000, score: 137, cumulative reward: -90.21214255229492
Episode: 935/1000, score: 94, cumulative reward: -118.06844833335111
Episode: 936/1000, score: 102, cumulative reward: -83.92025010631434
Episode: 937/1000, score: 75, cumulative reward: -135.90774815120633
Episode: 938/1000, score: 109, cumulative reward: -169.9036057062228
Episode: 939/1000, score: 129, cumulative reward: 5.919373593310837
Episode: 940/1000, score: 55, cumulative reward: -89.42119648005102
Episode: 941/1000, score: 146, cumulative reward: -133.02945650495406
Episode: 942/1000, score: 157, cumulative reward: -588.7364238643132
Episode: 943/1000, score: 118, cumulative reward: -53.00911289213335
Episode: 944/1000, score: 68, cumulative reward: -226.44552569592355
Episode: 945/1000, score: 76, cumulative reward: -187.36546909001424
Episode: 946/1000, score: 189, cumulative reward: -379.1599007131159
Episode: 947/1000, score: 134, cumulative reward: -98.77661268266847
Episode: 948/1000, score: 61, cumulative reward: -53.017786681990465
Episode: 949/1000, score: 128, cumulative reward: -346.7753973923213
Episode: 950/1000, score: 195, cumulative reward: -216.43327712777165
Episode: 951/1000, score: 112, cumulative reward: 44.81142773691306
Episode: 952/1000, score: 89, cumulative reward: -221.64567432405505
Episode: 953/1000, score: 75, cumulative reward: -167.18056163535687
Episode: 954/1000, score: 68, cumulative reward: -110.09874335146625
Episode: 955/1000, score: 146, cumulative reward: -223.6981887466141
Episode: 956/1000, score: 100, cumulative reward: -138.8580980695694
Episode: 957/1000, score: 128, cumulative reward: -192.5926396001471
Episode: 958/1000, score: 75, cumulative reward: -387.6026951154469
Episode: 959/1000, score: 107, cumulative reward: -45.28564029415847
Episode: 960/1000, score: 52, cumulative reward: -225.2152839850131
Episode: 961/1000, score: 85, cumulative reward: -41.992089952623665
Episode: 962/1000, score: 83, cumulative reward: -294.23685896777215
Episode: 963/1000, score: 143, cumulative reward: -136.2221620400532
Episode: 964/1000, score: 107, cumulative reward: -12.393479642237367
Episode: 965/1000, score: 140, cumulative reward: -15.81049003893591
Episode: 966/1000, score: 112, cumulative reward: -18.420865720877202
Episode: 967/1000, score: 161, cumulative reward: 61.14399333101812
Episode: 968/1000, score: 198, cumulative reward: -86.65082621184715
Episode: 969/1000, score: 308, cumulative reward: -418.94714693035434
Episode: 970/1000, score: 248, cumulative reward: -281.5721848154204
Episode: 971/1000, score: 104, cumulative reward: -197.14413305530442
Episode: 972/1000, score: 249, cumulative reward: 8.46408103707816
Episode: 973/1000, score: 85, cumulative reward: -376.01829963780733
Episode: 974/1000, score: 174, cumulative reward: -645.3830691730637
Episode: 975/1000, score: 155, cumulative reward: -81.80620518178651
Episode: 976/1000, score: 152, cumulative reward: -210.15732693902697
Episode: 977/1000, score: 165, cumulative reward: -78.94307965229473
Episode: 978/1000, score: 142, cumulative reward: -85.2415179248679
Episode: 979/1000, score: 143, cumulative reward: -69.75053456074767
Episode: 980/1000, score: 100, cumulative reward: -55.04015785618233
Episode: 981/1000, score: 200, cumulative reward: -224.49607939654294
Episode: 982/1000, score: 298, cumulative reward: -72.40593661633991
Episode: 983/1000, score: 205, cumulative reward: -36.311958237387984
Episode: 984/1000, score: 142, cumulative reward: -145.97718543681376
Episode: 985/1000, score: 74, cumulative reward: -156.02961272400051
Episode: 986/1000, score: 425, cumulative reward: -389.60631791174205
Episode: 987/1000, score: 87, cumulative reward: -36.51466246698244
Episode: 988/1000, score: 123, cumulative reward: 28.027569576618873
Episode: 989/1000, score: 92, cumulative reward: -58.59939768581047
Episode: 990/1000, score: 75, cumulative reward: -210.87604635700433
Episode: 991/1000, score: 107, cumulative reward: -113.47134686933757
Episode: 992/1000, score: 109, cumulative reward: -21.390104542114983
Episode: 993/1000, score: 89, cumulative reward: -73.28431870845786
Episode: 994/1000, score: 72, cumulative reward: -43.8745540438165
Episode: 995/1000, score: 77, cumulative reward: -115.34466852677866
Episode: 996/1000, score: 271, cumulative reward: -230.54649968045103
Episode: 997/1000, score: 167, cumulative reward: -453.21056752439813
Episode: 998/1000, score: 129, cumulative reward: -161.3377994673669
Episode: 999/1000, score: 245, cumulative reward: -551.512177673166
Episode: 1000/1000, score: 71, cumulative reward: -132.8390202439552
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1001/1000, score: 65, cumulative reward: -626.0924591519772
Episode: 1002/1000, score: 76, cumulative reward: -679.402468102077
Episode: 1003/1000, score: 72, cumulative reward: -675.4306131307861
Episode: 1004/1000, score: 109, cumulative reward: -336.2753546900834
Episode: 1005/1000, score: 133, cumulative reward: -1250.4201338918106
Episode: 1006/1000, score: 63, cumulative reward: -662.1607531904268
Episode: 1007/1000, score: 59, cumulative reward: -618.9443336218878
Episode: 1008/1000, score: 75, cumulative reward: -610.8183830660821
Episode: 1009/1000, score: 63, cumulative reward: -462.75830998046916
Episode: 1010/1000, score: 75, cumulative reward: -692.8926645691298
Episode: 1011/1000, score: 56, cumulative reward: -125.42344691376027
Episode: 1012/1000, score: 58, cumulative reward: -376.03476306117693
Episode: 1013/1000, score: 62, cumulative reward: -329.74125311926485
Episode: 1014/1000, score: 84, cumulative reward: -31.68434942133905
Episode: 1015/1000, score: 72, cumulative reward: -332.58408639199905
Episode: 1016/1000, score: 51, cumulative reward: -113.54871351476949
Episode: 1017/1000, score: 78, cumulative reward: -311.3484712722541
Episode: 1018/1000, score: 83, cumulative reward: -301.51246756627336
Episode: 1019/1000, score: 70, cumulative reward: -451.73101799964934
Episode: 1020/1000, score: 56, cumulative reward: -376.71249552047374
Episode: 1021/1000, score: 50, cumulative reward: -423.3440443504253
Episode: 1022/1000, score: 62, cumulative reward: -442.845336919829
Episode: 1023/1000, score: 95, cumulative reward: -99.22178532336575
Episode: 1024/1000, score: 68, cumulative reward: -423.0693718520698
Episode: 1025/1000, score: 79, cumulative reward: -538.6709897783352
Episode: 1026/1000, score: 62, cumulative reward: -285.0663496177218
Episode: 1027/1000, score: 160, cumulative reward: -947.1596940930756
Episode: 1028/1000, score: 96, cumulative reward: -492.7076516980467
Episode: 1029/1000, score: 58, cumulative reward: -530.7762036911743
Episode: 1030/1000, score: 94, cumulative reward: -494.15377135610424
Episode: 1031/1000, score: 89, cumulative reward: -686.1833814174468
Episode: 1032/1000, score: 126, cumulative reward: -24.429661126392006
Episode: 1033/1000, score: 63, cumulative reward: -159.77073007607717
Episode: 1034/1000, score: 131, cumulative reward: -684.5803535764629
Episode: 1035/1000, score: 81, cumulative reward: -142.92318263345658
Episode: 1036/1000, score: 99, cumulative reward: -544.7907264635276
Episode: 1037/1000, score: 67, cumulative reward: -614.785137062667
Episode: 1038/1000, score: 56, cumulative reward: -89.77081774958721
Episode: 1039/1000, score: 55, cumulative reward: -80.19620934091843
Episode: 1040/1000, score: 100, cumulative reward: -752.6633331530649
Episode: 1041/1000, score: 52, cumulative reward: -196.19552576103183
Episode: 1042/1000, score: 70, cumulative reward: -154.530926082006
Episode: 1043/1000, score: 57, cumulative reward: -39.07586894886563
Episode: 1044/1000, score: 91, cumulative reward: -117.76444553015746
Episode: 1045/1000, score: 86, cumulative reward: -521.1097773116899
Episode: 1046/1000, score: 81, cumulative reward: -299.2704793642269
Episode: 1047/1000, score: 65, cumulative reward: -364.5153128360567
Episode: 1048/1000, score: 78, cumulative reward: -625.1124085187113
Episode: 1049/1000, score: 75, cumulative reward: -554.2292986441211
Episode: 1050/1000, score: 57, cumulative reward: -132.1434877411955
Episode: 1051/1000, score: 70, cumulative reward: -553.0005061475259
Episode: 1052/1000, score: 114, cumulative reward: -479.32262837723124
Episode: 1053/1000, score: 95, cumulative reward: -559.3254927091721
Episode: 1054/1000, score: 107, cumulative reward: -814.8909663160093
Episode: 1055/1000, score: 58, cumulative reward: -494.8847746227594
Episode: 1056/1000, score: 78, cumulative reward: -429.51855886304844
Episode: 1057/1000, score: 78, cumulative reward: -436.8631812111096
Episode: 1058/1000, score: 55, cumulative reward: -211.19983250131207
Episode: 1059/1000, score: 70, cumulative reward: -140.51302555163312
Episode: 1060/1000, score: 69, cumulative reward: -136.1299327714646
Episode: 1061/1000, score: 70, cumulative reward: -201.0985455041406
Episode: 1062/1000, score: 78, cumulative reward: -179.95567987369253
Episode: 1063/1000, score: 57, cumulative reward: -103.4778626260888
Episode: 1064/1000, score: 72, cumulative reward: -702.0131023162705
Episode: 1065/1000, score: 73, cumulative reward: -597.8808937431834
Episode: 1066/1000, score: 122, cumulative reward: -41.56688341113626
Episode: 1067/1000, score: 88, cumulative reward: -654.4006099764184
Episode: 1068/1000, score: 75, cumulative reward: -649.9056946966438
Episode: 1069/1000, score: 78, cumulative reward: -478.5972040212323
Episode: 1070/1000, score: 72, cumulative reward: -344.1503279328065
Episode: 1071/1000, score: 97, cumulative reward: -260.6877924813783
Episode: 1072/1000, score: 53, cumulative reward: -145.79851410187305
Episode: 1073/1000, score: 85, cumulative reward: -193.47063893207303
Episode: 1074/1000, score: 53, cumulative reward: -75.37033576705977
Episode: 1075/1000, score: 103, cumulative reward: -793.5697236586848
Episode: 1076/1000, score: 86, cumulative reward: -230.33597977719828
Episode: 1077/1000, score: 64, cumulative reward: -110.55421537187456
Episode: 1078/1000, score: 81, cumulative reward: -860.1285740942579
Episode: 1079/1000, score: 63, cumulative reward: -683.5789852291402
Episode: 1080/1000, score: 54, cumulative reward: -46.967118979383145
Episode: 1081/1000, score: 70, cumulative reward: -426.15380256683403
Episode: 1082/1000, score: 63, cumulative reward: -329.01296754927046
Episode: 1083/1000, score: 87, cumulative reward: -642.2961182751005
Episode: 1084/1000, score: 70, cumulative reward: -131.63882752419693
Episode: 1085/1000, score: 55, cumulative reward: -134.87099432243423
Episode: 1086/1000, score: 54, cumulative reward: -81.13317636817075
Episode: 1087/1000, score: 77, cumulative reward: -405.10836929755186
Episode: 1088/1000, score: 53, cumulative reward: -140.50518278064354
Episode: 1089/1000, score: 65, cumulative reward: -454.8489825548977
Episode: 1090/1000, score: 103, cumulative reward: 30.550358607838177
Episode: 1091/1000, score: 86, cumulative reward: -111.18089648020526
Episode: 1092/1000, score: 78, cumulative reward: -132.7182059169886
Episode: 1093/1000, score: 70, cumulative reward: -421.9197031957657
Episode: 1094/1000, score: 76, cumulative reward: -504.6923836070877
Episode: 1095/1000, score: 57, cumulative reward: -118.00454320121014
Episode: 1096/1000, score: 57, cumulative reward: -176.73726075223814
Episode: 1097/1000, score: 91, cumulative reward: -67.46530127439536
Episode: 1098/1000, score: 73, cumulative reward: -274.88516015023225
Episode: 1099/1000, score: 76, cumulative reward: -172.88246581920242
Episode: 1100/1000, score: 70, cumulative reward: -89.51851862838892
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1101/1000, score: 68, cumulative reward: -407.78234253015665
Episode: 1102/1000, score: 77, cumulative reward: -422.97746363080097
Episode: 1103/1000, score: 66, cumulative reward: -157.58216177147438
Episode: 1104/1000, score: 60, cumulative reward: -144.85060825917455
Episode: 1105/1000, score: 76, cumulative reward: -586.2997738032989
Episode: 1106/1000, score: 71, cumulative reward: -293.1726951545792
Episode: 1107/1000, score: 61, cumulative reward: -143.51574909605455
Episode: 1108/1000, score: 77, cumulative reward: -162.68902023117886
Episode: 1109/1000, score: 81, cumulative reward: -556.2252509393472
Episode: 1110/1000, score: 51, cumulative reward: -345.10870868482755
Episode: 1111/1000, score: 104, cumulative reward: -288.6771387346489
Episode: 1112/1000, score: 80, cumulative reward: -533.8252322187109
Episode: 1113/1000, score: 59, cumulative reward: -254.24637123147045
Episode: 1114/1000, score: 74, cumulative reward: -298.5369579005793
Episode: 1115/1000, score: 54, cumulative reward: -229.04398858208512
Episode: 1116/1000, score: 67, cumulative reward: -216.0143838945546
Episode: 1117/1000, score: 72, cumulative reward: -267.734187912037
Episode: 1118/1000, score: 58, cumulative reward: -232.29472303249491
Episode: 1119/1000, score: 120, cumulative reward: -210.94787997427215
Episode: 1120/1000, score: 63, cumulative reward: -257.89063014262115
Episode: 1121/1000, score: 107, cumulative reward: -230.4634082430536
Episode: 1122/1000, score: 64, cumulative reward: -164.05845916631802
Episode: 1123/1000, score: 62, cumulative reward: -136.96032796076452
Episode: 1124/1000, score: 60, cumulative reward: -111.72536622823822
Episode: 1125/1000, score: 73, cumulative reward: -482.11446119998953
Episode: 1126/1000, score: 157, cumulative reward: -509.3524052011351
Episode: 1127/1000, score: 74, cumulative reward: -256.9330256461163
Episode: 1128/1000, score: 66, cumulative reward: -115.3288014572056
Episode: 1129/1000, score: 78, cumulative reward: -459.16342200972844
Episode: 1130/1000, score: 78, cumulative reward: -357.1047286580935
Episode: 1131/1000, score: 52, cumulative reward: -204.55136020028843
Episode: 1132/1000, score: 52, cumulative reward: -392.69934693147
Episode: 1133/1000, score: 62, cumulative reward: -424.9953401426804
Episode: 1134/1000, score: 67, cumulative reward: -441.8550089540482
Episode: 1135/1000, score: 87, cumulative reward: -504.17584797681775
Episode: 1136/1000, score: 90, cumulative reward: -708.0533676818603
Episode: 1137/1000, score: 78, cumulative reward: -460.86425367929814
Episode: 1138/1000, score: 79, cumulative reward: -451.16710644795853
Episode: 1139/1000, score: 59, cumulative reward: -486.43306570347465
Episode: 1140/1000, score: 77, cumulative reward: -682.4907650697164
Episode: 1141/1000, score: 59, cumulative reward: -560.765409260011
Episode: 1142/1000, score: 74, cumulative reward: -734.9201513610745
Episode: 1143/1000, score: 55, cumulative reward: -406.798396714691
Episode: 1144/1000, score: 82, cumulative reward: -800.422187651599
Episode: 1145/1000, score: 76, cumulative reward: -406.1955808491082
Episode: 1146/1000, score: 83, cumulative reward: -359.2504652049911
Episode: 1147/1000, score: 55, cumulative reward: -548.696939537057
Episode: 1148/1000, score: 95, cumulative reward: -804.2905438157372
Episode: 1149/1000, score: 113, cumulative reward: -618.5597439770295
Episode: 1150/1000, score: 63, cumulative reward: -83.5260620499209
Episode: 1151/1000, score: 70, cumulative reward: -623.0129956624569
Episode: 1152/1000, score: 78, cumulative reward: -215.35571831723973
Episode: 1153/1000, score: 111, cumulative reward: -274.63874175827084
Episode: 1154/1000, score: 95, cumulative reward: -579.3378221515163
Episode: 1155/1000, score: 95, cumulative reward: -670.7107787596881
Episode: 1156/1000, score: 178, cumulative reward: -800.9275974480862
Episode: 1157/1000, score: 132, cumulative reward: -617.2086660223698
Episode: 1158/1000, score: 104, cumulative reward: -381.2523550674412
Episode: 1159/1000, score: 75, cumulative reward: -133.3742605180729
Episode: 1160/1000, score: 109, cumulative reward: -556.7205869135388
Episode: 1161/1000, score: 102, cumulative reward: -83.81799569263856
Episode: 1162/1000, score: 66, cumulative reward: -86.11261747048513
Episode: 1163/1000, score: 69, cumulative reward: -132.8103307125558
Episode: 1164/1000, score: 72, cumulative reward: -333.59384101535863
Episode: 1165/1000, score: 63, cumulative reward: -248.68544287829124
Episode: 1166/1000, score: 76, cumulative reward: -462.7606212567024
Episode: 1167/1000, score: 120, cumulative reward: -199.64088796112148
Episode: 1168/1000, score: 83, cumulative reward: -80.01933678265361
Episode: 1169/1000, score: 70, cumulative reward: -376.40882533866744
Episode: 1170/1000, score: 92, cumulative reward: -243.95159409255686
Episode: 1171/1000, score: 61, cumulative reward: -124.59750870086877
Episode: 1172/1000, score: 87, cumulative reward: -413.43902079098757
Episode: 1173/1000, score: 99, cumulative reward: -103.92463611682541
Episode: 1174/1000, score: 91, cumulative reward: -225.15027104041351
Episode: 1175/1000, score: 70, cumulative reward: -226.61094911605326
Episode: 1176/1000, score: 120, cumulative reward: -186.96710177733476
Episode: 1177/1000, score: 204, cumulative reward: -229.5534444355794
Episode: 1178/1000, score: 68, cumulative reward: -233.74029754518483
Episode: 1179/1000, score: 96, cumulative reward: -160.7935451719664
Episode: 1180/1000, score: 55, cumulative reward: -376.31150313762134
Episode: 1181/1000, score: 79, cumulative reward: -235.33990732092462
Episode: 1182/1000, score: 85, cumulative reward: -428.3138460449203
Episode: 1183/1000, score: 96, cumulative reward: -167.85526810999667
Episode: 1184/1000, score: 132, cumulative reward: -196.91581178257383
Episode: 1185/1000, score: 123, cumulative reward: -423.8320172787885
Episode: 1186/1000, score: 82, cumulative reward: -130.87586243617795
Episode: 1187/1000, score: 74, cumulative reward: -66.0176816938299
Episode: 1188/1000, score: 54, cumulative reward: -301.8654985151802
Episode: 1189/1000, score: 165, cumulative reward: -336.0370432040552
Episode: 1190/1000, score: 62, cumulative reward: -295.1268136225382
Episode: 1191/1000, score: 69, cumulative reward: 8.525790084286555
Episode: 1192/1000, score: 85, cumulative reward: -114.53610697696
Episode: 1193/1000, score: 94, cumulative reward: -32.31945486149681
Episode: 1194/1000, score: 77, cumulative reward: -443.1228724863728
Episode: 1195/1000, score: 51, cumulative reward: -103.1234008831471
Episode: 1196/1000, score: 60, cumulative reward: -117.84755435266968
Episode: 1197/1000, score: 106, cumulative reward: -128.18115835464204
Episode: 1198/1000, score: 140, cumulative reward: -268.15174126119825
Episode: 1199/1000, score: 87, cumulative reward: -125.5864286203203
Episode: 1200/1000, score: 67, cumulative reward: -161.4922484490892
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1201/1000, score: 103, cumulative reward: -431.85296884695873
Episode: 1202/1000, score: 73, cumulative reward: -168.58034870806586
Episode: 1203/1000, score: 98, cumulative reward: -222.57704098238247
Episode: 1204/1000, score: 124, cumulative reward: -302.8588451161577
Episode: 1205/1000, score: 73, cumulative reward: -242.26630434443626
Episode: 1206/1000, score: 93, cumulative reward: -659.3731955177652
Episode: 1207/1000, score: 65, cumulative reward: -292.31096910809606
Episode: 1208/1000, score: 76, cumulative reward: -258.9697920436654
Episode: 1209/1000, score: 74, cumulative reward: -301.4325307702252
Episode: 1210/1000, score: 96, cumulative reward: -391.79318888760724
Episode: 1211/1000, score: 77, cumulative reward: -115.30172253706066
Episode: 1212/1000, score: 53, cumulative reward: -120.8535003513716
Episode: 1213/1000, score: 58, cumulative reward: -87.9398220170832
Episode: 1214/1000, score: 157, cumulative reward: -100.26491189980666
Episode: 1215/1000, score: 106, cumulative reward: -255.76458851579062
Episode: 1216/1000, score: 105, cumulative reward: -460.6753738072587
Episode: 1217/1000, score: 129, cumulative reward: -419.87262391212863
Episode: 1218/1000, score: 155, cumulative reward: -202.86696133691478
Episode: 1219/1000, score: 258, cumulative reward: -538.4691768951601
Episode: 1220/1000, score: 199, cumulative reward: -54.66288988134803
Episode: 1221/1000, score: 135, cumulative reward: -478.0114348246543
Episode: 1222/1000, score: 59, cumulative reward: -532.1938074187599
Episode: 1223/1000, score: 120, cumulative reward: -76.8480395518839
Episode: 1224/1000, score: 90, cumulative reward: -287.32471550499196
Episode: 1225/1000, score: 93, cumulative reward: -160.46543106899574
Episode: 1226/1000, score: 223, cumulative reward: -278.5315244250459
Episode: 1227/1000, score: 134, cumulative reward: -243.1318263057741
Episode: 1228/1000, score: 84, cumulative reward: -293.70317214420106
Episode: 1229/1000, score: 119, cumulative reward: -114.37215738712919
Episode: 1230/1000, score: 105, cumulative reward: -343.17970999173184
Episode: 1231/1000, score: 85, cumulative reward: -1.360974001987941
Episode: 1232/1000, score: 70, cumulative reward: -271.243467856191
Episode: 1233/1000, score: 83, cumulative reward: -155.87387594314868
Episode: 1234/1000, score: 89, cumulative reward: -131.72051654993356
Episode: 1235/1000, score: 89, cumulative reward: -436.99669008649045
Episode: 1236/1000, score: 93, cumulative reward: -296.1859112936149
Episode: 1237/1000, score: 149, cumulative reward: -626.5749968949337
Episode: 1238/1000, score: 92, cumulative reward: -167.38892976075982
Episode: 1239/1000, score: 88, cumulative reward: -409.119659159398
Episode: 1240/1000, score: 86, cumulative reward: -374.21711140430386
Episode: 1241/1000, score: 92, cumulative reward: -492.426007126085
Episode: 1242/1000, score: 65, cumulative reward: -237.0689418891415
Episode: 1243/1000, score: 70, cumulative reward: -223.0952262507331
Episode: 1244/1000, score: 65, cumulative reward: -290.98282400626385
Episode: 1245/1000, score: 85, cumulative reward: -472.69227097513726
Episode: 1246/1000, score: 63, cumulative reward: -234.43490678542625
Episode: 1247/1000, score: 76, cumulative reward: -167.4759832197852
Episode: 1248/1000, score: 100, cumulative reward: -207.86517055810242
Episode: 1249/1000, score: 94, cumulative reward: -137.9113574001117
Episode: 1250/1000, score: 82, cumulative reward: -344.654156755704
Episode: 1251/1000, score: 209, cumulative reward: -267.2633742545299
Episode: 1252/1000, score: 108, cumulative reward: -385.02802512545924
Episode: 1253/1000, score: 95, cumulative reward: -377.4525304007285
Episode: 1254/1000, score: 104, cumulative reward: -337.95547691729564
Episode: 1255/1000, score: 122, cumulative reward: -226.7011874315431
Episode: 1256/1000, score: 84, cumulative reward: -427.6991448322593
Episode: 1257/1000, score: 96, cumulative reward: -793.1151871583454
Episode: 1258/1000, score: 80, cumulative reward: -123.75786430181996
Episode: 1259/1000, score: 55, cumulative reward: -261.88538226170044
Episode: 1260/1000, score: 66, cumulative reward: -156.59660224799876
Episode: 1261/1000, score: 93, cumulative reward: -316.90570519679886
Episode: 1262/1000, score: 84, cumulative reward: -231.6215570299899
Episode: 1263/1000, score: 113, cumulative reward: -38.60951059564795
Episode: 1264/1000, score: 129, cumulative reward: -431.5522691520889
Episode: 1265/1000, score: 128, cumulative reward: -316.2585426396659
Episode: 1266/1000, score: 82, cumulative reward: -398.62532653089005
Episode: 1267/1000, score: 74, cumulative reward: -159.74653808755272
Episode: 1268/1000, score: 121, cumulative reward: -210.1523034368025
Episode: 1269/1000, score: 110, cumulative reward: -221.55009636456612
Episode: 1270/1000, score: 83, cumulative reward: -434.3017094877149
Episode: 1271/1000, score: 109, cumulative reward: -256.76847980824834
Episode: 1272/1000, score: 146, cumulative reward: -257.53679657282447
Episode: 1273/1000, score: 73, cumulative reward: -325.7107629284176
Episode: 1274/1000, score: 77, cumulative reward: -211.62481453659785
Episode: 1275/1000, score: 66, cumulative reward: -362.99889258007477
Episode: 1276/1000, score: 99, cumulative reward: -424.24668713457623
Episode: 1277/1000, score: 89, cumulative reward: -138.27729070353155
Episode: 1278/1000, score: 70, cumulative reward: -158.06027656044148
Episode: 1279/1000, score: 118, cumulative reward: 30.970793182665034
Episode: 1280/1000, score: 144, cumulative reward: 0.2951786192632966
Episode: 1281/1000, score: 74, cumulative reward: -322.5320297482372
Episode: 1282/1000, score: 97, cumulative reward: -235.99072014573937
Episode: 1283/1000, score: 81, cumulative reward: -464.377060270183
Episode: 1284/1000, score: 95, cumulative reward: -448.0451427846004
Episode: 1285/1000, score: 90, cumulative reward: -485.2958489370149
Episode: 1286/1000, score: 72, cumulative reward: -595.7850270552294
Episode: 1287/1000, score: 72, cumulative reward: -602.178859395085
Episode: 1288/1000, score: 78, cumulative reward: -738.3475228142522
Episode: 1289/1000, score: 77, cumulative reward: -472.00122797103916
Episode: 1290/1000, score: 105, cumulative reward: -1025.3172117859717
Episode: 1291/1000, score: 72, cumulative reward: -131.66542669059436
Episode: 1292/1000, score: 52, cumulative reward: -155.65867256263408
Episode: 1293/1000, score: 116, cumulative reward: -255.9664144262348
Episode: 1294/1000, score: 53, cumulative reward: -211.37228730351183
Episode: 1295/1000, score: 84, cumulative reward: -477.4711385135722
Episode: 1296/1000, score: 65, cumulative reward: -636.3618877173622
Episode: 1297/1000, score: 106, cumulative reward: -115.34414390423308
Episode: 1298/1000, score: 147, cumulative reward: -785.2705033233394
Episode: 1299/1000, score: 81, cumulative reward: -130.81937553060993
Episode: 1300/1000, score: 62, cumulative reward: -475.38657053482143
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1301/1000, score: 73, cumulative reward: -427.9815845675643
Episode: 1302/1000, score: 68, cumulative reward: -314.5688023936302
Episode: 1303/1000, score: 98, cumulative reward: -524.9967165354019
Episode: 1304/1000, score: 159, cumulative reward: -103.58562140428486
Episode: 1305/1000, score: 149, cumulative reward: -282.9116881572983
Episode: 1306/1000, score: 73, cumulative reward: -608.1077867678864
Episode: 1307/1000, score: 52, cumulative reward: -466.1520645852876
Episode: 1308/1000, score: 69, cumulative reward: -142.8191166951218
Episode: 1309/1000, score: 55, cumulative reward: -395.68740004125584
Episode: 1310/1000, score: 71, cumulative reward: -255.78011556830205
Episode: 1311/1000, score: 71, cumulative reward: -10.134625718752588
Episode: 1312/1000, score: 49, cumulative reward: -329.79598468853465
Episode: 1313/1000, score: 118, cumulative reward: -289.04102878469325
Episode: 1314/1000, score: 78, cumulative reward: -143.71242932426372
Episode: 1315/1000, score: 52, cumulative reward: -324.22417469008633
Episode: 1316/1000, score: 66, cumulative reward: -103.23589026067108
Episode: 1317/1000, score: 78, cumulative reward: -375.47621704591734
Episode: 1318/1000, score: 51, cumulative reward: -328.2058631985515
Episode: 1319/1000, score: 68, cumulative reward: -450.6038617678412
Episode: 1320/1000, score: 84, cumulative reward: -60.04507267868726
Episode: 1321/1000, score: 102, cumulative reward: -512.6589949035592
Episode: 1322/1000, score: 142, cumulative reward: -52.79094568185007
Episode: 1323/1000, score: 114, cumulative reward: -140.1855132277734
Episode: 1324/1000, score: 81, cumulative reward: -93.54743170772127
Episode: 1325/1000, score: 52, cumulative reward: -93.0230408514807
Episode: 1326/1000, score: 176, cumulative reward: -210.4086028988624
Episode: 1327/1000, score: 88, cumulative reward: -444.36296445833096
Episode: 1328/1000, score: 136, cumulative reward: -185.23921689749682
Episode: 1329/1000, score: 111, cumulative reward: -653.0467509028708
Episode: 1330/1000, score: 200, cumulative reward: -1154.6493431224762
Episode: 1331/1000, score: 88, cumulative reward: -339.54901752797184
Episode: 1332/1000, score: 65, cumulative reward: -111.57926257891525
Episode: 1333/1000, score: 187, cumulative reward: -249.20206382827695
Episode: 1334/1000, score: 120, cumulative reward: -180.3997849464931
Episode: 1335/1000, score: 361, cumulative reward: -175.0504047954881
Episode: 1336/1000, score: 261, cumulative reward: -180.71407731068933
Episode: 1337/1000, score: 319, cumulative reward: -191.78802967026775
Episode: 1338/1000, score: 228, cumulative reward: -297.3320848477912
Episode: 1339/1000, score: 157, cumulative reward: -9.246768010707001
Episode: 1340/1000, score: 72, cumulative reward: -482.4324495209619
Episode: 1341/1000, score: 63, cumulative reward: -370.317151768838
Episode: 1342/1000, score: 182, cumulative reward: -230.5527095826549
Episode: 1343/1000, score: 74, cumulative reward: -244.4902927064659
Episode: 1344/1000, score: 81, cumulative reward: -94.12839311702618
Episode: 1345/1000, score: 97, cumulative reward: -172.30303865622597
Episode: 1346/1000, score: 95, cumulative reward: -32.23417777314498
Episode: 1347/1000, score: 196, cumulative reward: -164.98837386453766
Episode: 1348/1000, score: 92, cumulative reward: -140.90393053011306
Episode: 1349/1000, score: 91, cumulative reward: -101.88158099554609
Episode: 1350/1000, score: 190, cumulative reward: -240.8873901541873
Episode: 1351/1000, score: 183, cumulative reward: -80.68147961152488
Episode: 1352/1000, score: 104, cumulative reward: -62.979346849946175
Episode: 1353/1000, score: 62, cumulative reward: -265.8411089300764
Episode: 1354/1000, score: 451, cumulative reward: -208.9135029373105
Episode: 1355/1000, score: 97, cumulative reward: -255.54227154826413
Episode: 1356/1000, score: 118, cumulative reward: -136.17199314194875
Episode: 1357/1000, score: 58, cumulative reward: -153.35417419012663
Episode: 1358/1000, score: 72, cumulative reward: -231.0914622845854
Episode: 1359/1000, score: 157, cumulative reward: -255.0516156412081
Episode: 1360/1000, score: 75, cumulative reward: -58.49461683372332
Episode: 1361/1000, score: 61, cumulative reward: -174.04503481002675
Episode: 1362/1000, score: 98, cumulative reward: -493.47425724686684
Episode: 1363/1000, score: 84, cumulative reward: -60.733111038795116
Episode: 1364/1000, score: 77, cumulative reward: -206.90736800424128
Episode: 1365/1000, score: 63, cumulative reward: -358.6000659325823
Episode: 1366/1000, score: 68, cumulative reward: -96.47518424146489
Episode: 1367/1000, score: 64, cumulative reward: -287.6629925114762
Episode: 1368/1000, score: 66, cumulative reward: -272.4517722980372
Episode: 1369/1000, score: 92, cumulative reward: -394.62993417693167
Episode: 1370/1000, score: 87, cumulative reward: -220.57373025666857
Episode: 1371/1000, score: 244, cumulative reward: -406.8786186065922
Episode: 1372/1000, score: 70, cumulative reward: -249.20813691278516
Episode: 1373/1000, score: 148, cumulative reward: -226.37033688489709
Episode: 1374/1000, score: 56, cumulative reward: -75.02849020265323
Episode: 1375/1000, score: 57, cumulative reward: -231.70841096106622
Episode: 1376/1000, score: 78, cumulative reward: -85.41725302329837
Episode: 1377/1000, score: 94, cumulative reward: -268.14811414478487
Episode: 1378/1000, score: 56, cumulative reward: -220.86491568424498
Episode: 1379/1000, score: 68, cumulative reward: -103.13148895087929
Episode: 1380/1000, score: 168, cumulative reward: -147.99914695270763
Episode: 1381/1000, score: 68, cumulative reward: -233.64857773266317
Episode: 1382/1000, score: 233, cumulative reward: -342.7092758812126
Episode: 1383/1000, score: 84, cumulative reward: -243.02539015798453
Episode: 1384/1000, score: 126, cumulative reward: -178.63021469843767
Episode: 1385/1000, score: 98, cumulative reward: -232.1194652363977
Episode: 1386/1000, score: 98, cumulative reward: -100.83786628519906
Episode: 1387/1000, score: 54, cumulative reward: -309.86387373223334
Episode: 1388/1000, score: 149, cumulative reward: -245.4823031015208
Episode: 1389/1000, score: 58, cumulative reward: -323.42141006138144
Episode: 1390/1000, score: 84, cumulative reward: -346.07380444695485
Episode: 1391/1000, score: 87, cumulative reward: -269.4054856501692
Episode: 1392/1000, score: 74, cumulative reward: -105.63675394891072
Episode: 1393/1000, score: 121, cumulative reward: -169.79464025730175
Episode: 1394/1000, score: 110, cumulative reward: -28.97582924568141
Episode: 1395/1000, score: 141, cumulative reward: -473.2324578340928
Episode: 1396/1000, score: 66, cumulative reward: -431.8439015574234
Episode: 1397/1000, score: 68, cumulative reward: -326.3102341229009
Episode: 1398/1000, score: 128, cumulative reward: -213.11781079214143
Episode: 1399/1000, score: 49, cumulative reward: -398.04763983220107
Episode: 1400/1000, score: 133, cumulative reward: -1.8760160196980848
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1401/1000, score: 180, cumulative reward: -238.79913497928044
Episode: 1402/1000, score: 60, cumulative reward: -392.8868790637618
Episode: 1403/1000, score: 80, cumulative reward: -298.27100785949017
Episode: 1404/1000, score: 84, cumulative reward: -426.07134809429783
Episode: 1405/1000, score: 60, cumulative reward: -430.04533316173695
Episode: 1406/1000, score: 132, cumulative reward: 23.665455163620166
Episode: 1407/1000, score: 85, cumulative reward: -51.81636946666232
Episode: 1408/1000, score: 64, cumulative reward: -362.64683337912464
Episode: 1409/1000, score: 79, cumulative reward: -399.47597709147016
Episode: 1410/1000, score: 61, cumulative reward: -305.66879890207474
Episode: 1411/1000, score: 101, cumulative reward: -369.9110550403734
Episode: 1412/1000, score: 77, cumulative reward: -131.1825188831072
Episode: 1413/1000, score: 96, cumulative reward: -262.9157468773543
Episode: 1414/1000, score: 87, cumulative reward: -335.9776424391447
Episode: 1415/1000, score: 73, cumulative reward: -328.83606229920053
Episode: 1416/1000, score: 84, cumulative reward: -567.2837049298105
Episode: 1417/1000, score: 135, cumulative reward: -52.910859296146036
Episode: 1418/1000, score: 75, cumulative reward: -392.0231979188015
Episode: 1419/1000, score: 105, cumulative reward: -255.98399199953045
Episode: 1420/1000, score: 61, cumulative reward: -222.50881075651475
Episode: 1421/1000, score: 70, cumulative reward: -514.2689920579776
Episode: 1422/1000, score: 104, cumulative reward: -84.09390819621755
Episode: 1423/1000, score: 79, cumulative reward: -145.04112820821928
Episode: 1424/1000, score: 171, cumulative reward: -75.61445353833838
Episode: 1425/1000, score: 125, cumulative reward: -79.33237804115328
Episode: 1426/1000, score: 135, cumulative reward: -219.15752871681156
Episode: 1427/1000, score: 68, cumulative reward: -260.08666031136704
Episode: 1428/1000, score: 65, cumulative reward: -286.5275357328518
Episode: 1429/1000, score: 84, cumulative reward: -304.4913103728753
Episode: 1430/1000, score: 80, cumulative reward: -355.87353633955
Episode: 1431/1000, score: 140, cumulative reward: -283.0084664148127
Episode: 1432/1000, score: 137, cumulative reward: -323.5966796458921
Episode: 1433/1000, score: 141, cumulative reward: -196.95732639763253
Episode: 1434/1000, score: 111, cumulative reward: -182.87362993929776
Episode: 1435/1000, score: 95, cumulative reward: -89.63168188961009
Episode: 1436/1000, score: 126, cumulative reward: -130.49021376044436
Episode: 1437/1000, score: 93, cumulative reward: -33.440885276180595
Episode: 1438/1000, score: 60, cumulative reward: -315.7363209676827
Episode: 1439/1000, score: 159, cumulative reward: -175.9471560128417
Episode: 1440/1000, score: 98, cumulative reward: -122.8060459707764
Episode: 1441/1000, score: 59, cumulative reward: -163.2437196200686
Episode: 1442/1000, score: 109, cumulative reward: -103.30858687942114
Episode: 1443/1000, score: 59, cumulative reward: -199.72938092783141
Episode: 1444/1000, score: 76, cumulative reward: -103.65415496350923
Episode: 1445/1000, score: 70, cumulative reward: -269.58258338213375
Episode: 1446/1000, score: 168, cumulative reward: -351.1766430441871
Episode: 1447/1000, score: 117, cumulative reward: -236.3475907865392
Episode: 1448/1000, score: 156, cumulative reward: -44.276392562966436
Episode: 1449/1000, score: 124, cumulative reward: -94.77944508235196
Episode: 1450/1000, score: 84, cumulative reward: -45.32589238112935
Episode: 1451/1000, score: 82, cumulative reward: -164.32919092749592
Episode: 1452/1000, score: 121, cumulative reward: -446.4718217507021
Episode: 1453/1000, score: 100, cumulative reward: -245.09162496358104
Episode: 1454/1000, score: 161, cumulative reward: -89.76019036891685
Episode: 1455/1000, score: 75, cumulative reward: -317.7905385704545
Episode: 1456/1000, score: 125, cumulative reward: -208.94602460724886
Episode: 1457/1000, score: 93, cumulative reward: -170.9701562927288
Episode: 1458/1000, score: 116, cumulative reward: -144.40167121749556
Episode: 1459/1000, score: 108, cumulative reward: -388.4687145793253
Episode: 1460/1000, score: 75, cumulative reward: -99.91056841673353
Episode: 1461/1000, score: 63, cumulative reward: -216.1317064098566
Episode: 1462/1000, score: 189, cumulative reward: -185.219750891314
Episode: 1463/1000, score: 146, cumulative reward: -151.77772450212552
Episode: 1464/1000, score: 79, cumulative reward: -195.36663463930591
Episode: 1465/1000, score: 71, cumulative reward: -264.40076060143247
Episode: 1466/1000, score: 92, cumulative reward: -329.3123808080489
Episode: 1467/1000, score: 82, cumulative reward: -41.69206183402966
Episode: 1468/1000, score: 119, cumulative reward: -324.1513235215075
Episode: 1469/1000, score: 59, cumulative reward: -307.9479606260458
Episode: 1470/1000, score: 146, cumulative reward: -169.68487519431224
Episode: 1471/1000, score: 101, cumulative reward: -142.8046471609964
Episode: 1472/1000, score: 92, cumulative reward: -123.43801548169154
Episode: 1473/1000, score: 94, cumulative reward: -466.048168457068
Episode: 1474/1000, score: 85, cumulative reward: -191.8214249297962
Episode: 1475/1000, score: 98, cumulative reward: -97.7520221111893
Episode: 1476/1000, score: 106, cumulative reward: -292.63882152056135
Episode: 1477/1000, score: 93, cumulative reward: 19.629459690526616
Episode: 1478/1000, score: 108, cumulative reward: -112.90550431688047
Episode: 1479/1000, score: 70, cumulative reward: -435.1928406465877
Episode: 1480/1000, score: 79, cumulative reward: -247.67510584877027
Episode: 1481/1000, score: 83, cumulative reward: -39.925791882025045
Episode: 1482/1000, score: 107, cumulative reward: -75.01260080286549
Episode: 1483/1000, score: 59, cumulative reward: -132.50712791474933
Episode: 1484/1000, score: 73, cumulative reward: -246.84399209193344
Episode: 1485/1000, score: 115, cumulative reward: -96.34231788158402
Episode: 1486/1000, score: 78, cumulative reward: -265.01050897893765
Episode: 1487/1000, score: 68, cumulative reward: -185.64935503687462
Episode: 1488/1000, score: 82, cumulative reward: -301.6969958785088
Episode: 1489/1000, score: 106, cumulative reward: -272.71235709224425
Episode: 1490/1000, score: 65, cumulative reward: -107.37130794325621
Episode: 1491/1000, score: 88, cumulative reward: -143.20106540501126
Episode: 1492/1000, score: 88, cumulative reward: -230.882633147767
Episode: 1493/1000, score: 69, cumulative reward: -284.40907116755136
Episode: 1494/1000, score: 104, cumulative reward: -162.1942885367776
Episode: 1495/1000, score: 83, cumulative reward: -228.80956726132945
Episode: 1496/1000, score: 68, cumulative reward: -290.7348538845604
Episode: 1497/1000, score: 85, cumulative reward: -337.24497493395677
Episode: 1498/1000, score: 93, cumulative reward: -182.90120681798695
Episode: 1499/1000, score: 65, cumulative reward: -269.99996170938175
Episode: 1500/1000, score: 128, cumulative reward: -324.99170442752325
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1501/1000, score: 78, cumulative reward: -221.49117189863733
Episode: 1502/1000, score: 72, cumulative reward: -98.09178498758166
Episode: 1503/1000, score: 66, cumulative reward: -30.137845394786382
Episode: 1504/1000, score: 53, cumulative reward: -167.02925974206937
Episode: 1505/1000, score: 100, cumulative reward: -106.67159488052657
Episode: 1506/1000, score: 65, cumulative reward: -203.71409988316083
Episode: 1507/1000, score: 117, cumulative reward: -52.67063527705515
Episode: 1508/1000, score: 92, cumulative reward: -283.94529760975706
Episode: 1509/1000, score: 82, cumulative reward: -152.48699249549009
Episode: 1510/1000, score: 120, cumulative reward: -142.29443815290685
Episode: 1511/1000, score: 88, cumulative reward: -128.9594070771804
Episode: 1512/1000, score: 72, cumulative reward: -71.53681686759953
Episode: 1513/1000, score: 108, cumulative reward: -9.292418801454701
Episode: 1514/1000, score: 105, cumulative reward: -229.22872565026765
Episode: 1515/1000, score: 68, cumulative reward: -91.6794564816652
Episode: 1516/1000, score: 70, cumulative reward: -137.9251900340035
Episode: 1517/1000, score: 103, cumulative reward: -42.10323594785698
Episode: 1518/1000, score: 84, cumulative reward: -183.15836984655715
Episode: 1519/1000, score: 98, cumulative reward: -141.8860843820474
Episode: 1520/1000, score: 66, cumulative reward: -313.99411408635206
Episode: 1521/1000, score: 71, cumulative reward: -485.29947716488357
Episode: 1522/1000, score: 77, cumulative reward: -110.90362076772581
Episode: 1523/1000, score: 98, cumulative reward: -84.36793728196825
Episode: 1524/1000, score: 67, cumulative reward: -72.36406158138652
Episode: 1525/1000, score: 194, cumulative reward: -219.7132403955073
Episode: 1526/1000, score: 70, cumulative reward: -214.64561741018218
Episode: 1527/1000, score: 104, cumulative reward: -186.314716075181
Episode: 1528/1000, score: 183, cumulative reward: -198.2313169038197
Episode: 1529/1000, score: 84, cumulative reward: -71.62629969715667
Episode: 1530/1000, score: 57, cumulative reward: -428.02191449848647
Episode: 1531/1000, score: 177, cumulative reward: -70.24808925642112
Episode: 1532/1000, score: 95, cumulative reward: -113.41631959275513
Episode: 1533/1000, score: 55, cumulative reward: -248.21667572154072
Episode: 1534/1000, score: 257, cumulative reward: -151.12262236763928
Episode: 1535/1000, score: 83, cumulative reward: -232.14557967987298
Episode: 1536/1000, score: 126, cumulative reward: -404.0504172526638
Episode: 1537/1000, score: 157, cumulative reward: -208.43143263865548
Episode: 1538/1000, score: 64, cumulative reward: -122.09804779815359
Episode: 1539/1000, score: 128, cumulative reward: -176.21853696448665
Episode: 1540/1000, score: 109, cumulative reward: -153.0790569351165
Episode: 1541/1000, score: 93, cumulative reward: -241.34742259699925
Episode: 1542/1000, score: 142, cumulative reward: -7.977735575511645
Episode: 1543/1000, score: 78, cumulative reward: -48.53270459103301
Episode: 1544/1000, score: 205, cumulative reward: -365.3623395213114
Episode: 1545/1000, score: 49, cumulative reward: -327.9406901492398
Episode: 1546/1000, score: 69, cumulative reward: -201.80229734847876
Episode: 1547/1000, score: 67, cumulative reward: -223.85870351956646
Episode: 1548/1000, score: 73, cumulative reward: -298.3027557958492
Episode: 1549/1000, score: 77, cumulative reward: -192.802082591089
Episode: 1550/1000, score: 57, cumulative reward: -450.48700115444626
Episode: 1551/1000, score: 90, cumulative reward: -380.22278397329336
Episode: 1552/1000, score: 146, cumulative reward: -839.7780163231807
Episode: 1553/1000, score: 119, cumulative reward: -200.25003992064978
Episode: 1554/1000, score: 72, cumulative reward: -495.4405662038352
Episode: 1555/1000, score: 87, cumulative reward: -207.2613290214138
Episode: 1556/1000, score: 89, cumulative reward: -331.2837862066448
Episode: 1557/1000, score: 122, cumulative reward: -368.2025885045687
Episode: 1558/1000, score: 114, cumulative reward: -209.56145366701344
Episode: 1559/1000, score: 113, cumulative reward: -94.4321731462144
Episode: 1560/1000, score: 114, cumulative reward: -408.2932883257786
Episode: 1561/1000, score: 148, cumulative reward: -188.7753883065348
Episode: 1562/1000, score: 73, cumulative reward: -478.44838016219575
Episode: 1563/1000, score: 91, cumulative reward: -276.598843028364
Episode: 1564/1000, score: 59, cumulative reward: -372.75510321801795
Episode: 1565/1000, score: 80, cumulative reward: -346.62379677311577
Episode: 1566/1000, score: 96, cumulative reward: -148.7093749201441
Episode: 1567/1000, score: 101, cumulative reward: -327.1358744041406
Episode: 1568/1000, score: 174, cumulative reward: -206.32100192702686
Episode: 1569/1000, score: 142, cumulative reward: -304.8506467842776
Episode: 1570/1000, score: 71, cumulative reward: -105.02004697287144
Episode: 1571/1000, score: 59, cumulative reward: -300.05430304826257
Episode: 1572/1000, score: 112, cumulative reward: -528.7976669412571
Episode: 1573/1000, score: 86, cumulative reward: -214.9242115647226
Episode: 1574/1000, score: 65, cumulative reward: -148.32964338967133
Episode: 1575/1000, score: 96, cumulative reward: -217.0021261815183
Episode: 1576/1000, score: 91, cumulative reward: -189.2696041454856
Episode: 1577/1000, score: 58, cumulative reward: -272.5439365353766
Episode: 1578/1000, score: 108, cumulative reward: -137.97286472553952
Episode: 1579/1000, score: 58, cumulative reward: -117.45237255407724
Episode: 1580/1000, score: 91, cumulative reward: -328.8013404368576
Episode: 1581/1000, score: 103, cumulative reward: -250.02390507079966
Episode: 1582/1000, score: 146, cumulative reward: -165.88380471688825
Episode: 1583/1000, score: 108, cumulative reward: -249.9075436593043
Episode: 1584/1000, score: 84, cumulative reward: -288.695404749885
Episode: 1585/1000, score: 71, cumulative reward: -146.49774056350552
Episode: 1586/1000, score: 84, cumulative reward: -150.5504253260861
Episode: 1587/1000, score: 100, cumulative reward: -178.31633994044978
Episode: 1588/1000, score: 97, cumulative reward: -109.65310741724488
Episode: 1589/1000, score: 50, cumulative reward: -424.59392062550313
Episode: 1590/1000, score: 123, cumulative reward: -365.0751503948686
Episode: 1591/1000, score: 104, cumulative reward: -108.73190349326802
Episode: 1592/1000, score: 106, cumulative reward: -160.45667704699986
Episode: 1593/1000, score: 80, cumulative reward: -132.77000725861808
Episode: 1594/1000, score: 124, cumulative reward: -242.34941548965054
Episode: 1595/1000, score: 126, cumulative reward: -377.26849079975136
Episode: 1596/1000, score: 140, cumulative reward: -2.745853024644248
Episode: 1597/1000, score: 94, cumulative reward: -247.462680080721
Episode: 1598/1000, score: 124, cumulative reward: -124.4026171451154
Episode: 1599/1000, score: 117, cumulative reward: -435.7136488209207
Episode: 1600/1000, score: 123, cumulative reward: -135.38237437154487
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1601/1000, score: 130, cumulative reward: -205.639927846549
Episode: 1602/1000, score: 246, cumulative reward: -754.445549829317
Episode: 1603/1000, score: 192, cumulative reward: -175.6632367044818
Episode: 1604/1000, score: 114, cumulative reward: -132.03149983638684
Episode: 1605/1000, score: 69, cumulative reward: -195.14365151695122
Episode: 1606/1000, score: 68, cumulative reward: -155.82437845310395
Episode: 1607/1000, score: 107, cumulative reward: -190.2568978429998
Episode: 1608/1000, score: 66, cumulative reward: -501.81662366736595
Episode: 1609/1000, score: 149, cumulative reward: -341.40698713115734
Episode: 1610/1000, score: 127, cumulative reward: -177.78124128372548
Episode: 1611/1000, score: 122, cumulative reward: -482.1825054171856
Episode: 1612/1000, score: 101, cumulative reward: -322.1309124239514
Episode: 1613/1000, score: 118, cumulative reward: -169.1465335987138
Episode: 1614/1000, score: 328, cumulative reward: -233.61280507112988
Episode: 1615/1000, score: 110, cumulative reward: -903.3762640612963
Episode: 1616/1000, score: 140, cumulative reward: -480.6676422770603
Episode: 1617/1000, score: 76, cumulative reward: -207.62931995273826
Episode: 1618/1000, score: 124, cumulative reward: -1.6020678460057098
Episode: 1619/1000, score: 97, cumulative reward: -167.25965187950544
Episode: 1620/1000, score: 172, cumulative reward: -164.5478037293922
Episode: 1621/1000, score: 59, cumulative reward: -106.00234162569429
Episode: 1622/1000, score: 111, cumulative reward: -129.23020866171387
Episode: 1623/1000, score: 106, cumulative reward: -160.7460995962556
Episode: 1624/1000, score: 124, cumulative reward: -155.07642859957767
Episode: 1625/1000, score: 191, cumulative reward: -99.39757317849578
Episode: 1626/1000, score: 100, cumulative reward: -97.10129923790377
Episode: 1627/1000, score: 106, cumulative reward: -182.15453648496447
Episode: 1628/1000, score: 139, cumulative reward: -213.61260850240802
Episode: 1629/1000, score: 113, cumulative reward: -104.63100595036346
Episode: 1630/1000, score: 87, cumulative reward: -97.27474506310945
Episode: 1631/1000, score: 132, cumulative reward: -263.2188005452887
Episode: 1632/1000, score: 52, cumulative reward: -218.39934246730814
Episode: 1633/1000, score: 111, cumulative reward: -8.620134914243181
Episode: 1634/1000, score: 103, cumulative reward: -200.056674017197
Episode: 1635/1000, score: 93, cumulative reward: -152.99432180533876
Episode: 1636/1000, score: 202, cumulative reward: 12.64871140211703
Episode: 1637/1000, score: 122, cumulative reward: -258.03650787474317
Episode: 1638/1000, score: 91, cumulative reward: -210.95800401647347
Episode: 1639/1000, score: 217, cumulative reward: -281.9851928412763
Episode: 1640/1000, score: 124, cumulative reward: -50.09228691973673
Episode: 1641/1000, score: 124, cumulative reward: -10.461138380754903
Episode: 1642/1000, score: 125, cumulative reward: -95.51753995596913
Episode: 1643/1000, score: 126, cumulative reward: -84.32585480017643
Episode: 1644/1000, score: 106, cumulative reward: -169.69456452359856
Episode: 1645/1000, score: 88, cumulative reward: -159.65688065750635
Episode: 1646/1000, score: 89, cumulative reward: -57.92340287533505
Episode: 1647/1000, score: 161, cumulative reward: -293.44878651526574
Episode: 1648/1000, score: 138, cumulative reward: -390.25411372564054
Episode: 1649/1000, score: 195, cumulative reward: -100.08542025756657
Episode: 1650/1000, score: 139, cumulative reward: -240.18135028961257
Episode: 1651/1000, score: 135, cumulative reward: -396.93822681038256
Episode: 1652/1000, score: 91, cumulative reward: -151.30942733021294
Episode: 1653/1000, score: 96, cumulative reward: -128.2850200686293
Episode: 1654/1000, score: 394, cumulative reward: -251.8764524226702
Episode: 1655/1000, score: 163, cumulative reward: -259.02875717132713
Episode: 1656/1000, score: 205, cumulative reward: -211.86924251574806
Episode: 1657/1000, score: 291, cumulative reward: -285.7165104089531
Episode: 1658/1000, score: 140, cumulative reward: -281.36497602360953
Episode: 1659/1000, score: 277, cumulative reward: -144.2789132037987
Episode: 1660/1000, score: 87, cumulative reward: -155.03016164596505
Episode: 1661/1000, score: 131, cumulative reward: -431.4614458403947
Episode: 1662/1000, score: 339, cumulative reward: -231.7677271696528
Episode: 1663/1000, score: 177, cumulative reward: -29.572494244196278
Episode: 1664/1000, score: 187, cumulative reward: -349.08741856750555
Episode: 1665/1000, score: 178, cumulative reward: -168.65146347952333
Episode: 1666/1000, score: 172, cumulative reward: -548.1773767916561
Episode: 1667/1000, score: 171, cumulative reward: -335.5759248089632
Episode: 1668/1000, score: 104, cumulative reward: -229.35811364132948
Episode: 1669/1000, score: 106, cumulative reward: -188.30355602322544
Episode: 1670/1000, score: 122, cumulative reward: -230.92813285465152
Episode: 1671/1000, score: 67, cumulative reward: -257.3325382900173
Episode: 1672/1000, score: 97, cumulative reward: -236.42081711598487
Episode: 1673/1000, score: 96, cumulative reward: -184.58392892105007
Episode: 1674/1000, score: 172, cumulative reward: -67.53780075360984
Episode: 1675/1000, score: 109, cumulative reward: -188.2946778591485
Episode: 1676/1000, score: 194, cumulative reward: -451.9852175793386
Episode: 1677/1000, score: 89, cumulative reward: -104.31157917216109
Episode: 1678/1000, score: 116, cumulative reward: -278.896467542011
Episode: 1679/1000, score: 121, cumulative reward: -299.38618788641196
Episode: 1680/1000, score: 98, cumulative reward: -379.8076853058884
Episode: 1681/1000, score: 93, cumulative reward: -161.95659970622214
Episode: 1682/1000, score: 106, cumulative reward: -125.92142951959495
Episode: 1683/1000, score: 126, cumulative reward: -223.29981694721067
Episode: 1684/1000, score: 171, cumulative reward: -430.47190353208725
Episode: 1685/1000, score: 94, cumulative reward: -218.67202080954445
Episode: 1686/1000, score: 54, cumulative reward: -373.14586682030006
Episode: 1687/1000, score: 96, cumulative reward: -448.231471462072
Episode: 1688/1000, score: 78, cumulative reward: -77.37681688398591
Episode: 1689/1000, score: 79, cumulative reward: -423.5267244045627
Episode: 1690/1000, score: 75, cumulative reward: -418.1090553928477
Episode: 1691/1000, score: 108, cumulative reward: -165.54957780698402
Episode: 1692/1000, score: 80, cumulative reward: -97.91899827894062
Episode: 1693/1000, score: 136, cumulative reward: -121.39642312738478
Episode: 1694/1000, score: 105, cumulative reward: -157.4088776439366
Episode: 1695/1000, score: 61, cumulative reward: -307.89750593364533
Episode: 1696/1000, score: 114, cumulative reward: -740.8664030914998
Episode: 1697/1000, score: 60, cumulative reward: -43.00327388822698
Episode: 1698/1000, score: 53, cumulative reward: -297.6123283927096
Episode: 1699/1000, score: 90, cumulative reward: -503.8308009783558
Episode: 1700/1000, score: 121, cumulative reward: -169.10383606115272
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1701/1000, score: 227, cumulative reward: -612.0600915401485
Episode: 1702/1000, score: 82, cumulative reward: -175.75879087345766
Episode: 1703/1000, score: 53, cumulative reward: -129.17155108942956
Episode: 1704/1000, score: 61, cumulative reward: -117.01328139994033
Episode: 1705/1000, score: 51, cumulative reward: -323.5375324101459
Episode: 1706/1000, score: 62, cumulative reward: -107.03464433902747
Episode: 1707/1000, score: 65, cumulative reward: -181.4372576138771
Episode: 1708/1000, score: 125, cumulative reward: -110.65064205738875
Episode: 1709/1000, score: 96, cumulative reward: -303.4899308201848
Episode: 1710/1000, score: 70, cumulative reward: -381.08944272666065
Episode: 1711/1000, score: 74, cumulative reward: -395.03200206987526
Episode: 1712/1000, score: 54, cumulative reward: -377.3820363629243
Episode: 1713/1000, score: 82, cumulative reward: -153.58706148532596
Episode: 1714/1000, score: 70, cumulative reward: -148.06328285230904
Episode: 1715/1000, score: 71, cumulative reward: -160.84227998514643
Episode: 1716/1000, score: 92, cumulative reward: -179.85356791792287
Episode: 1717/1000, score: 85, cumulative reward: -204.4331220396187
Episode: 1718/1000, score: 146, cumulative reward: -42.877379152330256
Episode: 1719/1000, score: 116, cumulative reward: -290.39441746008424
Episode: 1720/1000, score: 110, cumulative reward: -217.43917755468138
Episode: 1721/1000, score: 67, cumulative reward: -122.14938198195455
Episode: 1722/1000, score: 146, cumulative reward: -313.00252316670174
Episode: 1723/1000, score: 140, cumulative reward: -381.2611993911399
Episode: 1724/1000, score: 155, cumulative reward: -213.7848098202257
Episode: 1725/1000, score: 61, cumulative reward: -159.83515120921524
Episode: 1726/1000, score: 121, cumulative reward: -216.34534302098194
Episode: 1727/1000, score: 114, cumulative reward: -113.52781694763796
Episode: 1728/1000, score: 94, cumulative reward: -458.99610428243176
Episode: 1729/1000, score: 158, cumulative reward: -220.47323359732042
Episode: 1730/1000, score: 111, cumulative reward: -409.4436728557765
Episode: 1731/1000, score: 59, cumulative reward: -183.90225046403998
Episode: 1732/1000, score: 97, cumulative reward: -146.56698592027928
Episode: 1733/1000, score: 61, cumulative reward: -139.9465406792967
Episode: 1734/1000, score: 91, cumulative reward: -101.5937086317557
Episode: 1735/1000, score: 76, cumulative reward: -166.4972862747631
Episode: 1736/1000, score: 70, cumulative reward: -106.18007173751727
Episode: 1737/1000, score: 53, cumulative reward: -124.5422770642451
Episode: 1738/1000, score: 90, cumulative reward: -162.40413827636422
Episode: 1739/1000, score: 76, cumulative reward: -156.53089834742997
Episode: 1740/1000, score: 87, cumulative reward: -167.16834325643316
Episode: 1741/1000, score: 84, cumulative reward: -149.03068883797107
Episode: 1742/1000, score: 84, cumulative reward: -185.1519853631658
Episode: 1743/1000, score: 53, cumulative reward: -149.04863391157193
Episode: 1744/1000, score: 75, cumulative reward: -112.30096172638514
Episode: 1745/1000, score: 90, cumulative reward: -357.96422181554806
Episode: 1746/1000, score: 100, cumulative reward: -343.76088814653417
Episode: 1747/1000, score: 104, cumulative reward: -276.1837991308577
Episode: 1748/1000, score: 71, cumulative reward: -351.97744865932776
Episode: 1749/1000, score: 74, cumulative reward: -161.7724391253479
Episode: 1750/1000, score: 80, cumulative reward: -193.19390423714134
Episode: 1751/1000, score: 70, cumulative reward: -334.8828439693776
Episode: 1752/1000, score: 55, cumulative reward: -325.54719962616014
Episode: 1753/1000, score: 59, cumulative reward: -327.0551053895524
Episode: 1754/1000, score: 91, cumulative reward: -140.33505055433574
Episode: 1755/1000, score: 89, cumulative reward: -190.2362566709562
Episode: 1756/1000, score: 67, cumulative reward: -218.87439700725756
Episode: 1757/1000, score: 69, cumulative reward: -315.0084243653895
Episode: 1758/1000, score: 102, cumulative reward: -161.42556183074274
Episode: 1759/1000, score: 58, cumulative reward: -118.58159936386201
Episode: 1760/1000, score: 86, cumulative reward: -409.9885012175319
Episode: 1761/1000, score: 109, cumulative reward: -367.71396852547616
Episode: 1762/1000, score: 128, cumulative reward: -322.37070939566036
Episode: 1763/1000, score: 166, cumulative reward: -141.5399714831943
Episode: 1764/1000, score: 135, cumulative reward: -126.51176163004229
Episode: 1765/1000, score: 92, cumulative reward: -112.56857196941453
Episode: 1766/1000, score: 62, cumulative reward: -127.08875619061818
Episode: 1767/1000, score: 130, cumulative reward: -319.6462201489356
Episode: 1768/1000, score: 62, cumulative reward: -243.858044686269
Episode: 1769/1000, score: 98, cumulative reward: -530.0127315738969
Episode: 1770/1000, score: 58, cumulative reward: -272.90891177809476
Episode: 1771/1000, score: 51, cumulative reward: -144.72866124559437
Episode: 1772/1000, score: 59, cumulative reward: -112.08918397437151
Episode: 1773/1000, score: 78, cumulative reward: -118.3678053120083
Episode: 1774/1000, score: 80, cumulative reward: -95.09198187020047
Episode: 1775/1000, score: 52, cumulative reward: -109.8118109584633
Episode: 1776/1000, score: 103, cumulative reward: -390.68195699385353
Episode: 1777/1000, score: 100, cumulative reward: -147.81414080940633
Episode: 1778/1000, score: 79, cumulative reward: -179.87640496904262
Episode: 1779/1000, score: 195, cumulative reward: -328.7331580799063
Episode: 1780/1000, score: 73, cumulative reward: -173.5926927301814
Episode: 1781/1000, score: 127, cumulative reward: -568.0650870019139
Episode: 1782/1000, score: 95, cumulative reward: -97.91648596620215
Episode: 1783/1000, score: 96, cumulative reward: -581.8849872501752
Episode: 1784/1000, score: 113, cumulative reward: -400.7903510985265
Episode: 1785/1000, score: 71, cumulative reward: -108.87289274048744
Episode: 1786/1000, score: 86, cumulative reward: -166.26760012642885
Episode: 1787/1000, score: 63, cumulative reward: -483.1047468237574
Episode: 1788/1000, score: 86, cumulative reward: -153.9552991570446
Episode: 1789/1000, score: 69, cumulative reward: -156.10347807394237
Episode: 1790/1000, score: 60, cumulative reward: -178.01493855182565
Episode: 1791/1000, score: 53, cumulative reward: -36.53922561184345
Episode: 1792/1000, score: 75, cumulative reward: -188.65385605304775
Episode: 1793/1000, score: 84, cumulative reward: -327.1127028343865
Episode: 1794/1000, score: 157, cumulative reward: -144.08135965839114
Episode: 1795/1000, score: 195, cumulative reward: 42.49427021514734
Episode: 1796/1000, score: 156, cumulative reward: -175.73367724370218
Episode: 1797/1000, score: 79, cumulative reward: -146.05927080381613
Episode: 1798/1000, score: 57, cumulative reward: -340.3107073344451
Episode: 1799/1000, score: 78, cumulative reward: -148.1350231568068
Episode: 1800/1000, score: 59, cumulative reward: -115.20204110339778
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1801/1000, score: 113, cumulative reward: -240.8842304406977
Episode: 1802/1000, score: 60, cumulative reward: -128.70094048453296
Episode: 1803/1000, score: 73, cumulative reward: -204.41558264648728
Episode: 1804/1000, score: 74, cumulative reward: -348.63033693629114
Episode: 1805/1000, score: 88, cumulative reward: -70.65911545629342
Episode: 1806/1000, score: 73, cumulative reward: -136.07432755903483
Episode: 1807/1000, score: 74, cumulative reward: -438.7459614997554
Episode: 1808/1000, score: 333, cumulative reward: -227.39260760487062
Episode: 1809/1000, score: 124, cumulative reward: -90.72411845261675
Episode: 1810/1000, score: 82, cumulative reward: -129.68287742770005
Episode: 1811/1000, score: 70, cumulative reward: -82.88592979217236
Episode: 1812/1000, score: 124, cumulative reward: -301.8015719698593
Episode: 1813/1000, score: 106, cumulative reward: -189.93510217423875
Episode: 1814/1000, score: 79, cumulative reward: -129.36024817598644
Episode: 1815/1000, score: 194, cumulative reward: -247.1026368307612
Episode: 1816/1000, score: 76, cumulative reward: -227.39339677482448
Episode: 1817/1000, score: 67, cumulative reward: -345.92158811509927
Episode: 1818/1000, score: 65, cumulative reward: -338.7966912297544
Episode: 1819/1000, score: 76, cumulative reward: -101.78893530474207
Episode: 1820/1000, score: 90, cumulative reward: -56.64866578842774
Episode: 1821/1000, score: 68, cumulative reward: -226.34700754885296
Episode: 1822/1000, score: 68, cumulative reward: -344.8428724816913
Episode: 1823/1000, score: 66, cumulative reward: -199.39661814478728
Episode: 1824/1000, score: 62, cumulative reward: -47.91913607130205
Episode: 1825/1000, score: 189, cumulative reward: -293.3494464433313
Episode: 1826/1000, score: 99, cumulative reward: -292.3702898911739
Episode: 1827/1000, score: 65, cumulative reward: -454.7677833371489
Episode: 1828/1000, score: 99, cumulative reward: -363.75573923518436
Episode: 1829/1000, score: 61, cumulative reward: -438.9684896594668
Episode: 1830/1000, score: 68, cumulative reward: -472.8352648880907
Episode: 1831/1000, score: 60, cumulative reward: -336.7612044401572
Episode: 1832/1000, score: 64, cumulative reward: -450.8146432403085
Episode: 1833/1000, score: 176, cumulative reward: -55.221842507455364
Episode: 1834/1000, score: 82, cumulative reward: -877.3700482917219
Episode: 1835/1000, score: 119, cumulative reward: -118.03727353946563
Episode: 1836/1000, score: 104, cumulative reward: -266.25215236887607
Episode: 1837/1000, score: 176, cumulative reward: -565.8844503624063
Episode: 1838/1000, score: 77, cumulative reward: -349.3929255476594
Episode: 1839/1000, score: 97, cumulative reward: -201.98976506675032
Episode: 1840/1000, score: 149, cumulative reward: -335.2168906707812
Episode: 1841/1000, score: 105, cumulative reward: -831.949164813318
Episode: 1842/1000, score: 83, cumulative reward: 16.280646553704912
Episode: 1843/1000, score: 94, cumulative reward: -241.17280614967572
Episode: 1844/1000, score: 73, cumulative reward: -244.40579540317796
Episode: 1845/1000, score: 87, cumulative reward: -356.8843009629959
Episode: 1846/1000, score: 122, cumulative reward: -240.19615294703323
Episode: 1847/1000, score: 65, cumulative reward: -121.22112897275922
Episode: 1848/1000, score: 119, cumulative reward: -330.0804927320039
Episode: 1849/1000, score: 111, cumulative reward: -195.5282911640558
Episode: 1850/1000, score: 95, cumulative reward: 25.661313442423975
Episode: 1851/1000, score: 156, cumulative reward: -216.58870583718368
Episode: 1852/1000, score: 109, cumulative reward: -286.166164904731
Episode: 1853/1000, score: 63, cumulative reward: -314.03730575671034
Episode: 1854/1000, score: 119, cumulative reward: -368.32523325235127
Episode: 1855/1000, score: 105, cumulative reward: 25.602967199451655
Episode: 1856/1000, score: 68, cumulative reward: -369.23416392321576
Episode: 1857/1000, score: 61, cumulative reward: -187.48409534931432
Episode: 1858/1000, score: 86, cumulative reward: -159.94229206195672
Episode: 1859/1000, score: 72, cumulative reward: -348.68157639348465
Episode: 1860/1000, score: 100, cumulative reward: 16.760644663755016
Episode: 1861/1000, score: 74, cumulative reward: -313.08418634519836
Episode: 1862/1000, score: 80, cumulative reward: -422.5121014623116
Episode: 1863/1000, score: 126, cumulative reward: -143.9474342693395
Episode: 1864/1000, score: 61, cumulative reward: -239.9011126093542
Episode: 1865/1000, score: 103, cumulative reward: -169.73358665477008
Episode: 1866/1000, score: 75, cumulative reward: -188.27488329741112
Episode: 1867/1000, score: 87, cumulative reward: -213.11466966033353
Episode: 1868/1000, score: 164, cumulative reward: -173.00814838318482
Episode: 1869/1000, score: 109, cumulative reward: -206.5545702239461
Episode: 1870/1000, score: 82, cumulative reward: -133.61322866558254
Episode: 1871/1000, score: 109, cumulative reward: -225.67896640304167
Episode: 1872/1000, score: 107, cumulative reward: -128.900170382292
Episode: 1873/1000, score: 99, cumulative reward: -51.627340349916665
Episode: 1874/1000, score: 80, cumulative reward: -129.67214420901138
Episode: 1875/1000, score: 79, cumulative reward: -216.57618215743884
Episode: 1876/1000, score: 121, cumulative reward: -101.44275852138648
Episode: 1877/1000, score: 159, cumulative reward: -87.41718537265686
Episode: 1878/1000, score: 89, cumulative reward: -155.16276238130382
Episode: 1879/1000, score: 75, cumulative reward: -126.55696913643496
Episode: 1880/1000, score: 65, cumulative reward: -59.70297362606712
Episode: 1881/1000, score: 64, cumulative reward: -124.82773250977567
Episode: 1882/1000, score: 63, cumulative reward: -172.4178697550927
Episode: 1883/1000, score: 195, cumulative reward: -428.8484490488828
Episode: 1884/1000, score: 53, cumulative reward: -137.39180825914283
Episode: 1885/1000, score: 235, cumulative reward: -460.95888256929175
Episode: 1886/1000, score: 126, cumulative reward: -349.84206288884786
Episode: 1887/1000, score: 158, cumulative reward: -338.11500040241685
Episode: 1888/1000, score: 130, cumulative reward: -375.16180820111276
Episode: 1889/1000, score: 83, cumulative reward: -579.3209762753208
Episode: 1890/1000, score: 138, cumulative reward: -395.0173230767358
Episode: 1891/1000, score: 137, cumulative reward: -119.64199274393101
Episode: 1892/1000, score: 133, cumulative reward: -392.45696920819535
Episode: 1893/1000, score: 134, cumulative reward: -178.33041100100723
Episode: 1894/1000, score: 66, cumulative reward: -194.7488853350577
Episode: 1895/1000, score: 174, cumulative reward: 42.59819797805659
Episode: 1896/1000, score: 78, cumulative reward: -86.75840154670387
Episode: 1897/1000, score: 107, cumulative reward: -108.37296038458587
Episode: 1898/1000, score: 54, cumulative reward: -178.27182279927297
Episode: 1899/1000, score: 172, cumulative reward: -195.92786371989513
Episode: 1900/1000, score: 111, cumulative reward: -113.10321670481889
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 1901/1000, score: 167, cumulative reward: -199.19898057854996
Episode: 1902/1000, score: 231, cumulative reward: -301.7089677788414
Episode: 1903/1000, score: 192, cumulative reward: -109.30109109562375
Episode: 1904/1000, score: 62, cumulative reward: -313.0561924006069
Episode: 1905/1000, score: 196, cumulative reward: -509.60153091101444
Episode: 1906/1000, score: 174, cumulative reward: -262.96304194133455
Episode: 1907/1000, score: 55, cumulative reward: -121.16851086781323
Episode: 1908/1000, score: 112, cumulative reward: -453.4269280905515
Episode: 1909/1000, score: 208, cumulative reward: -285.32976288423856
Episode: 1910/1000, score: 193, cumulative reward: -212.48143376490765
Episode: 1911/1000, score: 75, cumulative reward: -157.24915589690528
Episode: 1912/1000, score: 55, cumulative reward: -157.31637577478423
Episode: 1913/1000, score: 164, cumulative reward: -234.46278385651303
Episode: 1914/1000, score: 106, cumulative reward: -655.2681076891039
Episode: 1915/1000, score: 64, cumulative reward: -200.32108453794473
Episode: 1916/1000, score: 79, cumulative reward: -459.31384564882393
Episode: 1917/1000, score: 91, cumulative reward: -246.28576235548195
Episode: 1918/1000, score: 74, cumulative reward: -312.7496293863556
Episode: 1919/1000, score: 113, cumulative reward: -479.3567853775732
Episode: 1920/1000, score: 111, cumulative reward: -357.97698541666637
Episode: 1921/1000, score: 62, cumulative reward: -354.15526661782246
Episode: 1922/1000, score: 98, cumulative reward: -143.1219420202264
Episode: 1923/1000, score: 118, cumulative reward: -461.96147827452086
Episode: 1924/1000, score: 85, cumulative reward: -256.97795685119786
Episode: 1925/1000, score: 139, cumulative reward: -327.8744507153662
Episode: 1926/1000, score: 112, cumulative reward: -376.1489608848213
Episode: 1927/1000, score: 146, cumulative reward: -8.681271742407901
Episode: 1928/1000, score: 98, cumulative reward: -304.8351308819771
Episode: 1929/1000, score: 96, cumulative reward: -187.79007807001005
Episode: 1930/1000, score: 58, cumulative reward: -351.9171465284147
Episode: 1931/1000, score: 99, cumulative reward: -315.3313448318761
Episode: 1932/1000, score: 61, cumulative reward: -137.84037106062277
Episode: 1933/1000, score: 72, cumulative reward: -128.89855407381873
Episode: 1934/1000, score: 56, cumulative reward: -117.62739523991803
Episode: 1935/1000, score: 61, cumulative reward: 14.826005712518239
Episode: 1936/1000, score: 74, cumulative reward: -634.8644635580365
Episode: 1937/1000, score: 81, cumulative reward: -361.05340803387486
Episode: 1938/1000, score: 56, cumulative reward: -402.68636189481066
Episode: 1939/1000, score: 58, cumulative reward: -266.49192294940605
Episode: 1940/1000, score: 135, cumulative reward: -528.1383379107689
Episode: 1941/1000, score: 62, cumulative reward: -313.44924354576165
Episode: 1942/1000, score: 51, cumulative reward: -289.83496539612054
Episode: 1943/1000, score: 73, cumulative reward: -426.50555217178936
Episode: 1944/1000, score: 63, cumulative reward: -297.244723703952
Episode: 1945/1000, score: 133, cumulative reward: -324.9192250246335
Episode: 1946/1000, score: 83, cumulative reward: -438.64952160026587
Episode: 1947/1000, score: 114, cumulative reward: -320.77007394462566
Episode: 1948/1000, score: 73, cumulative reward: -224.59906096562284
Episode: 1949/1000, score: 125, cumulative reward: -275.60844928571845
Episode: 1950/1000, score: 51, cumulative reward: -77.16139182420433
Episode: 1951/1000, score: 127, cumulative reward: -261.3912887122941
Episode: 1952/1000, score: 64, cumulative reward: -239.82403263859462
Episode: 1953/1000, score: 78, cumulative reward: -243.33930011990853
Episode: 1954/1000, score: 59, cumulative reward: -153.9260365380778
Episode: 1955/1000, score: 110, cumulative reward: -537.4804653774986
Episode: 1956/1000, score: 70, cumulative reward: -610.7685434392536
Episode: 1957/1000, score: 91, cumulative reward: -294.27224961198567
Episode: 1958/1000, score: 78, cumulative reward: -492.57855006815714
Episode: 1959/1000, score: 78, cumulative reward: -282.47094838789224
Episode: 1960/1000, score: 55, cumulative reward: -249.9658482685809
Episode: 1961/1000, score: 55, cumulative reward: -99.34620836962971
Episode: 1962/1000, score: 86, cumulative reward: -792.0963316405772
Episode: 1963/1000, score: 89, cumulative reward: -26.558734207627538
Episode: 1964/1000, score: 164, cumulative reward: -24.20935734051197
Episode: 1965/1000, score: 61, cumulative reward: -353.3730841736699
Episode: 1966/1000, score: 61, cumulative reward: -144.90611674906017
Episode: 1967/1000, score: 73, cumulative reward: -239.12685323205054
Episode: 1968/1000, score: 55, cumulative reward: -180.09939473268767
Episode: 1969/1000, score: 81, cumulative reward: -119.94279125774347
Episode: 1970/1000, score: 63, cumulative reward: -146.9988847840596
Episode: 1971/1000, score: 97, cumulative reward: -92.90885208185459
Episode: 1972/1000, score: 85, cumulative reward: -51.86912160188212
Episode: 1973/1000, score: 118, cumulative reward: -221.53818503749076
Episode: 1974/1000, score: 50, cumulative reward: -33.59695583946231
Episode: 1975/1000, score: 122, cumulative reward: -113.48997929686057
Episode: 1976/1000, score: 70, cumulative reward: -173.418002047159
Episode: 1977/1000, score: 94, cumulative reward: -181.3663582445971
Episode: 1978/1000, score: 55, cumulative reward: -431.547058547336
Episode: 1979/1000, score: 134, cumulative reward: -218.43361201155625
Episode: 1980/1000, score: 66, cumulative reward: -341.1460848650786
Episode: 1981/1000, score: 87, cumulative reward: -144.60756923527842
Episode: 1982/1000, score: 162, cumulative reward: -237.44007856770116
Episode: 1983/1000, score: 60, cumulative reward: -201.55944247469017
Episode: 1984/1000, score: 87, cumulative reward: -284.66916848024005
Episode: 1985/1000, score: 82, cumulative reward: -217.24339530379208
Episode: 1986/1000, score: 67, cumulative reward: -65.2663915212548
Episode: 1987/1000, score: 69, cumulative reward: -127.10677873511654
Episode: 1988/1000, score: 59, cumulative reward: -165.37825626869727
Episode: 1989/1000, score: 68, cumulative reward: -327.57866762994695
Episode: 1990/1000, score: 72, cumulative reward: -154.54635913920254
Episode: 1991/1000, score: 57, cumulative reward: -385.9978418973668
Episode: 1992/1000, score: 140, cumulative reward: -331.50555769430696
Episode: 1993/1000, score: 162, cumulative reward: -164.0052115883502
Episode: 1994/1000, score: 79, cumulative reward: -132.9963957391572
Episode: 1995/1000, score: 105, cumulative reward: -254.7351685416034
Episode: 1996/1000, score: 61, cumulative reward: -210.95052236436214
Episode: 1997/1000, score: 65, cumulative reward: -163.80787243111016
Episode: 1998/1000, score: 54, cumulative reward: -363.93720909391675
Episode: 1999/1000, score: 77, cumulative reward: -9.689976464850545
Episode: 2000/1000, score: 76, cumulative reward: -155.96300499957135
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2001/1000, score: 79, cumulative reward: -508.35216795404443
Episode: 2002/1000, score: 128, cumulative reward: -402.21249386623214
Episode: 2003/1000, score: 51, cumulative reward: -128.17635180044698
Episode: 2004/1000, score: 67, cumulative reward: -7.346645928669375
Episode: 2005/1000, score: 86, cumulative reward: -633.314980753327
Episode: 2006/1000, score: 62, cumulative reward: -332.57011485017074
Episode: 2007/1000, score: 64, cumulative reward: -35.320655321355275
Episode: 2008/1000, score: 142, cumulative reward: -154.93290291522652
Episode: 2009/1000, score: 54, cumulative reward: -490.3175391371157
Episode: 2010/1000, score: 85, cumulative reward: -196.86280019076287
Episode: 2011/1000, score: 128, cumulative reward: -258.4446296313609
Episode: 2012/1000, score: 55, cumulative reward: -118.878231049925
Episode: 2013/1000, score: 57, cumulative reward: -118.92803748312379
Episode: 2014/1000, score: 143, cumulative reward: -441.8332161326264
Episode: 2015/1000, score: 115, cumulative reward: -367.08246477988837
Episode: 2016/1000, score: 83, cumulative reward: -487.6582463873327
Episode: 2017/1000, score: 85, cumulative reward: -148.35571430437093
Episode: 2018/1000, score: 112, cumulative reward: -348.2909697194734
Episode: 2019/1000, score: 78, cumulative reward: -313.2408369732909
Episode: 2020/1000, score: 144, cumulative reward: -255.64943620673296
Episode: 2021/1000, score: 101, cumulative reward: -180.6723548443537
Episode: 2022/1000, score: 98, cumulative reward: -188.13069947356382
Episode: 2023/1000, score: 71, cumulative reward: -64.39418686233142
Episode: 2024/1000, score: 71, cumulative reward: -187.83218864237236
Episode: 2025/1000, score: 50, cumulative reward: -145.90174373400458
Episode: 2026/1000, score: 85, cumulative reward: -272.8426472910445
Episode: 2027/1000, score: 101, cumulative reward: -487.2519654184604
Episode: 2028/1000, score: 75, cumulative reward: -387.71966945935344
Episode: 2029/1000, score: 114, cumulative reward: -148.19167292956982
Episode: 2030/1000, score: 83, cumulative reward: -353.43414074782993
Episode: 2031/1000, score: 68, cumulative reward: -149.741042776315
Episode: 2032/1000, score: 55, cumulative reward: -265.6320429157351
Episode: 2033/1000, score: 70, cumulative reward: -480.96604992643944
Episode: 2034/1000, score: 125, cumulative reward: -275.98650724733
Episode: 2035/1000, score: 96, cumulative reward: -348.0166148769396
Episode: 2036/1000, score: 60, cumulative reward: -246.94434313087177
Episode: 2037/1000, score: 77, cumulative reward: 0.5374783706123338
Episode: 2038/1000, score: 102, cumulative reward: -356.03060244270887
Episode: 2039/1000, score: 98, cumulative reward: -254.1934974215289
Episode: 2040/1000, score: 60, cumulative reward: -133.16785759031666
Episode: 2041/1000, score: 85, cumulative reward: -123.72628930859126
Episode: 2042/1000, score: 59, cumulative reward: -156.28833407740382
Episode: 2043/1000, score: 63, cumulative reward: -174.80412102831525
Episode: 2044/1000, score: 65, cumulative reward: -134.81264709487593
Episode: 2045/1000, score: 59, cumulative reward: -212.97682966104108
Episode: 2046/1000, score: 73, cumulative reward: -273.8548504146325
Episode: 2047/1000, score: 125, cumulative reward: -221.04058718468957
Episode: 2048/1000, score: 74, cumulative reward: -381.2016389700477
Episode: 2049/1000, score: 61, cumulative reward: -354.65438346790984
Episode: 2050/1000, score: 88, cumulative reward: -13.670671112222806
Episode: 2051/1000, score: 85, cumulative reward: -69.17617143676495
Episode: 2052/1000, score: 88, cumulative reward: -213.84260747891815
Episode: 2053/1000, score: 64, cumulative reward: -138.14194514395186
Episode: 2054/1000, score: 57, cumulative reward: -114.69751179164973
Episode: 2055/1000, score: 99, cumulative reward: -120.71697063251067
Episode: 2056/1000, score: 112, cumulative reward: -168.2662970731286
Episode: 2057/1000, score: 53, cumulative reward: -192.28093345775488
Episode: 2058/1000, score: 105, cumulative reward: -141.19228044473405
Episode: 2059/1000, score: 151, cumulative reward: -271.7623577421049
Episode: 2060/1000, score: 51, cumulative reward: -125.92181513170816
Episode: 2061/1000, score: 269, cumulative reward: -458.6305615801642
Episode: 2062/1000, score: 83, cumulative reward: -154.6841393500329
Episode: 2063/1000, score: 78, cumulative reward: -85.24016047841492
Episode: 2064/1000, score: 152, cumulative reward: -191.7804622358037
Episode: 2065/1000, score: 89, cumulative reward: -121.2568342842749
Episode: 2066/1000, score: 88, cumulative reward: -146.19510244662717
Episode: 2067/1000, score: 172, cumulative reward: -192.27322945722622
Episode: 2068/1000, score: 64, cumulative reward: -120.95278285612855
Episode: 2069/1000, score: 145, cumulative reward: -77.55224398937696
Episode: 2070/1000, score: 118, cumulative reward: -265.85923692049346
Episode: 2071/1000, score: 70, cumulative reward: -291.3350078114617
Episode: 2072/1000, score: 104, cumulative reward: -194.99351953170708
Episode: 2073/1000, score: 407, cumulative reward: -229.36169973643428
Episode: 2074/1000, score: 61, cumulative reward: -168.72364182956403
Episode: 2075/1000, score: 62, cumulative reward: -130.27474779549556
Episode: 2076/1000, score: 155, cumulative reward: -300.8145485368226
Episode: 2077/1000, score: 95, cumulative reward: -87.8288570665838
Episode: 2078/1000, score: 74, cumulative reward: -123.6517840067632
Episode: 2079/1000, score: 99, cumulative reward: -137.38031311735188
Episode: 2080/1000, score: 124, cumulative reward: -181.31741623863462
Episode: 2081/1000, score: 72, cumulative reward: -128.56922668231147
Episode: 2082/1000, score: 54, cumulative reward: -190.35087570121584
Episode: 2083/1000, score: 64, cumulative reward: -261.70670164224157
Episode: 2084/1000, score: 113, cumulative reward: -142.2209756117956
Episode: 2085/1000, score: 217, cumulative reward: -352.69102358592806
Episode: 2086/1000, score: 158, cumulative reward: -388.25442876021265
Episode: 2087/1000, score: 136, cumulative reward: -324.5132670982535
Episode: 2088/1000, score: 125, cumulative reward: -341.9555881971968
Episode: 2089/1000, score: 67, cumulative reward: -160.45225607201118
Episode: 2090/1000, score: 73, cumulative reward: -122.77773182198567
Episode: 2091/1000, score: 130, cumulative reward: -208.12222009352968
Episode: 2092/1000, score: 192, cumulative reward: -368.865743027435
Episode: 2093/1000, score: 101, cumulative reward: -234.59185197541063
Episode: 2094/1000, score: 74, cumulative reward: -191.72875581009183
Episode: 2095/1000, score: 192, cumulative reward: -65.89839200628097
Episode: 2096/1000, score: 179, cumulative reward: -117.14190572743959
Episode: 2097/1000, score: 55, cumulative reward: -236.3019490965058
Episode: 2098/1000, score: 76, cumulative reward: -207.37711777383592
Episode: 2099/1000, score: 128, cumulative reward: -176.98360218968895
Episode: 2100/1000, score: 172, cumulative reward: -108.70473362738167
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2101/1000, score: 91, cumulative reward: -143.3408562603738
Episode: 2102/1000, score: 199, cumulative reward: -130.4220752321226
Episode: 2103/1000, score: 140, cumulative reward: -399.5455093760622
Episode: 2104/1000, score: 205, cumulative reward: -153.72930215789148
Episode: 2105/1000, score: 74, cumulative reward: -206.0997008259091
Episode: 2106/1000, score: 105, cumulative reward: -378.33822357145624
Episode: 2107/1000, score: 147, cumulative reward: -218.61338806498793
Episode: 2108/1000, score: 84, cumulative reward: -94.27780763567347
Episode: 2109/1000, score: 165, cumulative reward: -165.10345844778746
Episode: 2110/1000, score: 162, cumulative reward: -215.3142165469838
Episode: 2111/1000, score: 107, cumulative reward: -156.59923957340504
Episode: 2112/1000, score: 265, cumulative reward: -140.0069769149032
Episode: 2113/1000, score: 83, cumulative reward: -122.28635638715997
Episode: 2114/1000, score: 246, cumulative reward: -317.9344519698881
Episode: 2115/1000, score: 117, cumulative reward: -114.18419265909337
Episode: 2116/1000, score: 87, cumulative reward: -142.3632223111432
Episode: 2117/1000, score: 77, cumulative reward: -115.82002122574653
Episode: 2118/1000, score: 67, cumulative reward: -127.22479843495404
Episode: 2119/1000, score: 69, cumulative reward: -128.21729126587314
Episode: 2120/1000, score: 84, cumulative reward: -459.88741153738965
Episode: 2121/1000, score: 82, cumulative reward: -502.5837322454897
Episode: 2122/1000, score: 136, cumulative reward: -623.9858897208377
Episode: 2123/1000, score: 85, cumulative reward: -163.85555902879798
Episode: 2124/1000, score: 66, cumulative reward: -271.9235102359001
Episode: 2125/1000, score: 86, cumulative reward: -106.40138241739791
Episode: 2126/1000, score: 123, cumulative reward: -227.40817740982015
Episode: 2127/1000, score: 108, cumulative reward: -392.30566279786314
Episode: 2128/1000, score: 128, cumulative reward: -60.62207496813903
Episode: 2129/1000, score: 173, cumulative reward: -259.174729898527
Episode: 2130/1000, score: 82, cumulative reward: -141.98358256651218
Episode: 2131/1000, score: 67, cumulative reward: -160.95160246947626
Episode: 2132/1000, score: 59, cumulative reward: -258.4838018718699
Episode: 2133/1000, score: 85, cumulative reward: -132.25772912782654
Episode: 2134/1000, score: 68, cumulative reward: -341.7044279008806
Episode: 2135/1000, score: 137, cumulative reward: -153.0984560776785
Episode: 2136/1000, score: 82, cumulative reward: -522.9644932068222
Episode: 2137/1000, score: 96, cumulative reward: -93.1208399168292
Episode: 2138/1000, score: 77, cumulative reward: -146.90208124884995
Episode: 2139/1000, score: 202, cumulative reward: -113.11890210929303
Episode: 2140/1000, score: 155, cumulative reward: -349.28821033289876
Episode: 2141/1000, score: 81, cumulative reward: -97.01572603122996
Episode: 2142/1000, score: 299, cumulative reward: -436.04623587904575
Episode: 2143/1000, score: 99, cumulative reward: -203.1759248819632
Episode: 2144/1000, score: 58, cumulative reward: -97.44691260830157
Episode: 2145/1000, score: 126, cumulative reward: -107.91283260955002
Episode: 2146/1000, score: 104, cumulative reward: -166.4442425665266
Episode: 2147/1000, score: 85, cumulative reward: -421.1244692528738
Episode: 2148/1000, score: 49, cumulative reward: -107.14870661926638
Episode: 2149/1000, score: 119, cumulative reward: -186.88280812822597
Episode: 2150/1000, score: 155, cumulative reward: -75.79694180301547
Episode: 2151/1000, score: 70, cumulative reward: -141.33507320694378
Episode: 2152/1000, score: 403, cumulative reward: -233.79506324566162
Episode: 2153/1000, score: 118, cumulative reward: -687.6746316857717
Episode: 2154/1000, score: 91, cumulative reward: -69.64200840482741
Episode: 2155/1000, score: 253, cumulative reward: -255.7131628927599
Episode: 2156/1000, score: 231, cumulative reward: -283.52323176280987
Episode: 2157/1000, score: 90, cumulative reward: -114.37738725618335
Episode: 2158/1000, score: 128, cumulative reward: -36.502179713432355
Episode: 2159/1000, score: 244, cumulative reward: -317.6065478978789
Episode: 2160/1000, score: 168, cumulative reward: -56.99334433482501
Episode: 2161/1000, score: 130, cumulative reward: -99.87847200468859
Episode: 2162/1000, score: 89, cumulative reward: -139.18922976344365
Episode: 2163/1000, score: 106, cumulative reward: -244.76336675025183
Episode: 2164/1000, score: 232, cumulative reward: -230.2726648557042
Episode: 2165/1000, score: 65, cumulative reward: -99.92463942910634
Episode: 2166/1000, score: 71, cumulative reward: -111.8530587024787
Episode: 2167/1000, score: 110, cumulative reward: -161.69319492118376
Episode: 2168/1000, score: 269, cumulative reward: -296.8218857092297
Episode: 2169/1000, score: 100, cumulative reward: -91.86728740772693
Episode: 2170/1000, score: 165, cumulative reward: -285.3539292793538
Episode: 2171/1000, score: 169, cumulative reward: -266.2498250142934
Episode: 2172/1000, score: 102, cumulative reward: -254.13789492008874
Episode: 2173/1000, score: 107, cumulative reward: -306.4343329130583
Episode: 2174/1000, score: 223, cumulative reward: -0.9177254955323662
Episode: 2175/1000, score: 288, cumulative reward: -236.58294908601584
Episode: 2176/1000, score: 201, cumulative reward: -258.31555005939447
Episode: 2177/1000, score: 93, cumulative reward: -28.48029793659171
Episode: 2178/1000, score: 102, cumulative reward: -66.7372209544945
Episode: 2179/1000, score: 128, cumulative reward: -62.012006072049786
Episode: 2180/1000, score: 208, cumulative reward: -162.08844697646768
Episode: 2181/1000, score: 145, cumulative reward: -39.108340723373985
Episode: 2182/1000, score: 85, cumulative reward: -106.82357764990317
Episode: 2183/1000, score: 336, cumulative reward: -260.15221322263926
Episode: 2184/1000, score: 95, cumulative reward: -212.75083119734583
Episode: 2185/1000, score: 136, cumulative reward: -219.73383306365187
Episode: 2186/1000, score: 162, cumulative reward: -21.222282923025645
Episode: 2187/1000, score: 182, cumulative reward: -470.4635557163077
Episode: 2188/1000, score: 83, cumulative reward: -198.76279466957817
Episode: 2189/1000, score: 100, cumulative reward: -111.77397324563738
Episode: 2190/1000, score: 89, cumulative reward: -90.66741102677967
Episode: 2191/1000, score: 123, cumulative reward: -202.0755380455672
Episode: 2192/1000, score: 66, cumulative reward: -422.02581993337265
Episode: 2193/1000, score: 145, cumulative reward: -369.06608381213215
Episode: 2194/1000, score: 413, cumulative reward: -427.0540017741637
Episode: 2195/1000, score: 83, cumulative reward: -264.2287817175364
Episode: 2196/1000, score: 147, cumulative reward: -159.24295653932126
Episode: 2197/1000, score: 110, cumulative reward: -181.2490237456252
Episode: 2198/1000, score: 124, cumulative reward: -177.30123806016235
Episode: 2199/1000, score: 185, cumulative reward: -190.48832503197292
Episode: 2200/1000, score: 235, cumulative reward: -133.7073211575705
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2201/1000, score: 159, cumulative reward: -189.82973501213962
Episode: 2202/1000, score: 111, cumulative reward: -176.46292369524747
Episode: 2203/1000, score: 190, cumulative reward: -291.4502674227574
Episode: 2204/1000, score: 110, cumulative reward: -266.48013312780364
Episode: 2205/1000, score: 105, cumulative reward: -301.692604407314
Episode: 2206/1000, score: 242, cumulative reward: -245.68195573733098
Episode: 2207/1000, score: 378, cumulative reward: -427.9571608297274
Episode: 2208/1000, score: 230, cumulative reward: -284.25253413915414
Episode: 2209/1000, score: 99, cumulative reward: -405.27148382077803
Episode: 2210/1000, score: 155, cumulative reward: -33.40980686118881
Episode: 2211/1000, score: 85, cumulative reward: -72.35336605174409
Episode: 2212/1000, score: 69, cumulative reward: -291.717802960311
Episode: 2213/1000, score: 250, cumulative reward: -415.8800938072677
Episode: 2214/1000, score: 142, cumulative reward: -169.26762054533336
Episode: 2215/1000, score: 65, cumulative reward: -360.2192499390878
Episode: 2216/1000, score: 244, cumulative reward: -172.91109220928297
Episode: 2217/1000, score: 108, cumulative reward: -282.5482594679527
Episode: 2218/1000, score: 126, cumulative reward: -143.4851451489379
Episode: 2219/1000, score: 105, cumulative reward: -291.29138333355917
Episode: 2220/1000, score: 108, cumulative reward: -249.18135305311833
Episode: 2221/1000, score: 64, cumulative reward: -89.98544913794615
Episode: 2222/1000, score: 113, cumulative reward: -169.066916608732
Episode: 2223/1000, score: 54, cumulative reward: -321.4984218204285
Episode: 2224/1000, score: 105, cumulative reward: -526.4672746457068
Episode: 2225/1000, score: 83, cumulative reward: -160.0005249841118
Episode: 2226/1000, score: 64, cumulative reward: -164.59160547666775
Episode: 2227/1000, score: 93, cumulative reward: -179.49838202973243
Episode: 2228/1000, score: 76, cumulative reward: -387.12012848947313
Episode: 2229/1000, score: 165, cumulative reward: -64.37012769350878
Episode: 2230/1000, score: 70, cumulative reward: -183.98716786562002
Episode: 2231/1000, score: 104, cumulative reward: -233.11434320099733
Episode: 2232/1000, score: 98, cumulative reward: -137.21757802605825
Episode: 2233/1000, score: 60, cumulative reward: -113.53742428516205
Episode: 2234/1000, score: 83, cumulative reward: -137.10210772515455
Episode: 2235/1000, score: 47, cumulative reward: -381.377021686743
Episode: 2236/1000, score: 49, cumulative reward: -420.7146647787133
Episode: 2237/1000, score: 81, cumulative reward: -125.547055797095
Episode: 2238/1000, score: 61, cumulative reward: -408.60471353396696
Episode: 2239/1000, score: 64, cumulative reward: -88.69359075800348
Episode: 2240/1000, score: 71, cumulative reward: -114.70244111832406
Episode: 2241/1000, score: 64, cumulative reward: -238.93889171885465
Episode: 2242/1000, score: 123, cumulative reward: -137.1118668104667
Episode: 2243/1000, score: 121, cumulative reward: -311.8022266540287
Episode: 2244/1000, score: 90, cumulative reward: -115.08150378187585
Episode: 2245/1000, score: 62, cumulative reward: -471.05849449240895
Episode: 2246/1000, score: 61, cumulative reward: -386.0872764992522
Episode: 2247/1000, score: 69, cumulative reward: -204.84917421942453
Episode: 2248/1000, score: 59, cumulative reward: -140.7494052430994
Episode: 2249/1000, score: 89, cumulative reward: -173.13771841789145
Episode: 2250/1000, score: 90, cumulative reward: -218.7085429504263
Episode: 2251/1000, score: 53, cumulative reward: -131.14548036456713
Episode: 2252/1000, score: 68, cumulative reward: -177.79804894910757
Episode: 2253/1000, score: 79, cumulative reward: -240.31860996558683
Episode: 2254/1000, score: 62, cumulative reward: -107.08237028738961
Episode: 2255/1000, score: 89, cumulative reward: -276.4736258046695
Episode: 2256/1000, score: 127, cumulative reward: -316.3803441993074
Episode: 2257/1000, score: 120, cumulative reward: -137.27421731763178
Episode: 2258/1000, score: 74, cumulative reward: -111.17649683564193
Episode: 2259/1000, score: 57, cumulative reward: -131.16374502013662
Episode: 2260/1000, score: 70, cumulative reward: -509.2251460405189
Episode: 2261/1000, score: 77, cumulative reward: -157.59866369395792
Episode: 2262/1000, score: 68, cumulative reward: -102.00013314306543
Episode: 2263/1000, score: 60, cumulative reward: -110.88425262545522
Episode: 2264/1000, score: 88, cumulative reward: -248.7163047220804
Episode: 2265/1000, score: 126, cumulative reward: -192.11814964728933
Episode: 2266/1000, score: 122, cumulative reward: -138.6270752069613
Episode: 2267/1000, score: 84, cumulative reward: -288.57108691895235
Episode: 2268/1000, score: 95, cumulative reward: -84.37853155378998
Episode: 2269/1000, score: 49, cumulative reward: -329.72110803462
Episode: 2270/1000, score: 89, cumulative reward: -9.954066619049385
Episode: 2271/1000, score: 58, cumulative reward: -136.94126105144085
Episode: 2272/1000, score: 104, cumulative reward: 15.362551211774104
Episode: 2273/1000, score: 54, cumulative reward: -151.49917654872647
Episode: 2274/1000, score: 128, cumulative reward: -296.2314973896027
Episode: 2275/1000, score: 115, cumulative reward: -228.131021007963
Episode: 2276/1000, score: 82, cumulative reward: -276.32237288751315
Episode: 2277/1000, score: 90, cumulative reward: -193.83349277854958
Episode: 2278/1000, score: 92, cumulative reward: -166.59296113294533
Episode: 2279/1000, score: 193, cumulative reward: -534.6271691297261
Episode: 2280/1000, score: 60, cumulative reward: -178.5077855540606
Episode: 2281/1000, score: 75, cumulative reward: -280.52721201620665
Episode: 2282/1000, score: 122, cumulative reward: -215.25977489742525
Episode: 2283/1000, score: 57, cumulative reward: -120.24132587303978
Episode: 2284/1000, score: 92, cumulative reward: -191.07732125681264
Episode: 2285/1000, score: 88, cumulative reward: -151.8914182691925
Episode: 2286/1000, score: 197, cumulative reward: -278.4976847039087
Episode: 2287/1000, score: 55, cumulative reward: -152.2101223683399
Episode: 2288/1000, score: 143, cumulative reward: -364.54999176125665
Episode: 2289/1000, score: 180, cumulative reward: -197.34958478740253
Episode: 2290/1000, score: 76, cumulative reward: -153.9614933867931
Episode: 2291/1000, score: 65, cumulative reward: -98.49849940209342
Episode: 2292/1000, score: 90, cumulative reward: -118.5495316107455
Episode: 2293/1000, score: 83, cumulative reward: -868.0504006333588
Episode: 2294/1000, score: 89, cumulative reward: -206.94962431457873
Episode: 2295/1000, score: 108, cumulative reward: -208.69644495759744
Episode: 2296/1000, score: 63, cumulative reward: -116.8143453648697
Episode: 2297/1000, score: 88, cumulative reward: -264.81033450933177
Episode: 2298/1000, score: 60, cumulative reward: -130.95701602172386
Episode: 2299/1000, score: 115, cumulative reward: -146.97276449850276
Episode: 2300/1000, score: 128, cumulative reward: -246.41801755790627
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2301/1000, score: 61, cumulative reward: -220.15654292821858
Episode: 2302/1000, score: 72, cumulative reward: -122.9290278705484
Episode: 2303/1000, score: 61, cumulative reward: -145.16433037544363
Episode: 2304/1000, score: 87, cumulative reward: -525.0617164806162
Episode: 2305/1000, score: 167, cumulative reward: -99.93276526930379
Episode: 2306/1000, score: 146, cumulative reward: -126.57035853885485
Episode: 2307/1000, score: 159, cumulative reward: -330.1320011715683
Episode: 2308/1000, score: 96, cumulative reward: -141.97983286860023
Episode: 2309/1000, score: 119, cumulative reward: -220.5074779866773
Episode: 2310/1000, score: 207, cumulative reward: -94.53600739098577
Episode: 2311/1000, score: 113, cumulative reward: -296.01078231233
Episode: 2312/1000, score: 316, cumulative reward: -97.02314661257151
Episode: 2313/1000, score: 81, cumulative reward: -181.2801670988356
Episode: 2314/1000, score: 255, cumulative reward: -70.0604098082544
Episode: 2315/1000, score: 74, cumulative reward: -145.15057884709978
Episode: 2316/1000, score: 153, cumulative reward: -50.65340018398197
Episode: 2317/1000, score: 179, cumulative reward: -78.28984456018159
Episode: 2318/1000, score: 130, cumulative reward: -179.6030569455957
Episode: 2319/1000, score: 136, cumulative reward: -160.7374526015884
Episode: 2320/1000, score: 79, cumulative reward: -131.12098426635285
Episode: 2321/1000, score: 93, cumulative reward: -203.95794314017442
Episode: 2322/1000, score: 375, cumulative reward: -162.04107860032065
Episode: 2323/1000, score: 103, cumulative reward: -240.78730097005078
Episode: 2324/1000, score: 111, cumulative reward: -219.31329421848977
Episode: 2325/1000, score: 78, cumulative reward: -216.00299339997548
Episode: 2326/1000, score: 76, cumulative reward: -76.33030279719861
Episode: 2327/1000, score: 392, cumulative reward: -364.7017256904905
Episode: 2328/1000, score: 91, cumulative reward: -151.77456751110708
Episode: 2329/1000, score: 140, cumulative reward: -200.39006549718718
Episode: 2330/1000, score: 75, cumulative reward: -79.59497985197697
Episode: 2331/1000, score: 140, cumulative reward: -53.97760288369942
Episode: 2332/1000, score: 266, cumulative reward: -300.3287104393944
Episode: 2333/1000, score: 205, cumulative reward: -155.89867327604753
Episode: 2334/1000, score: 160, cumulative reward: -290.4241619503964
Episode: 2335/1000, score: 256, cumulative reward: -300.1951346964817
Episode: 2336/1000, score: 107, cumulative reward: -72.10805418036823
Episode: 2337/1000, score: 156, cumulative reward: -281.5314520225532
Episode: 2338/1000, score: 120, cumulative reward: -250.06539400780923
Episode: 2339/1000, score: 190, cumulative reward: -244.64672719251004
Episode: 2340/1000, score: 320, cumulative reward: -234.63802335167472
Episode: 2341/1000, score: 466, cumulative reward: -222.39050608396786
Episode: 2342/1000, score: 176, cumulative reward: -169.69044472804708
Episode: 2343/1000, score: 258, cumulative reward: -264.668895799281
Episode: 2344/1000, score: 305, cumulative reward: -102.8372208583393
Episode: 2345/1000, score: 89, cumulative reward: -194.30748174284986
Episode: 2346/1000, score: 310, cumulative reward: -148.88095740210167
Episode: 2347/1000, score: 89, cumulative reward: -442.91650675194114
Episode: 2348/1000, score: 296, cumulative reward: -460.44484574893187
Episode: 2349/1000, score: 140, cumulative reward: -456.11220831664707
Episode: 2350/1000, score: 226, cumulative reward: -300.1069505637234
Episode: 2351/1000, score: 107, cumulative reward: -32.5775031257662
Episode: 2352/1000, score: 144, cumulative reward: -428.0338022227468
Episode: 2353/1000, score: 67, cumulative reward: -177.0773650237303
Episode: 2354/1000, score: 313, cumulative reward: -429.27974012797125
Episode: 2355/1000, score: 365, cumulative reward: -243.2627768005461
Episode: 2356/1000, score: 117, cumulative reward: -169.18635258872794
Episode: 2357/1000, score: 174, cumulative reward: -181.341314570258
Episode: 2358/1000, score: 193, cumulative reward: -232.35920701195778
Episode: 2359/1000, score: 195, cumulative reward: -428.86587162529736
Episode: 2360/1000, score: 82, cumulative reward: -391.4612630390009
Episode: 2361/1000, score: 179, cumulative reward: -23.54444297036673
Episode: 2362/1000, score: 98, cumulative reward: -158.54110418523055
Episode: 2363/1000, score: 226, cumulative reward: -150.01172505647614
Episode: 2364/1000, score: 91, cumulative reward: -198.55809229202686
Episode: 2365/1000, score: 97, cumulative reward: -265.21830590401305
Episode: 2366/1000, score: 86, cumulative reward: -74.2720203406708
Episode: 2367/1000, score: 212, cumulative reward: -163.9035687371097
Episode: 2368/1000, score: 133, cumulative reward: -236.17664604564644
Episode: 2369/1000, score: 106, cumulative reward: -132.09150999412884
Episode: 2370/1000, score: 61, cumulative reward: -212.17164456866314
Episode: 2371/1000, score: 107, cumulative reward: -60.29855889805444
Episode: 2372/1000, score: 103, cumulative reward: -295.37281674698943
Episode: 2373/1000, score: 190, cumulative reward: -193.95370807639327
Episode: 2374/1000, score: 156, cumulative reward: -247.64436672882096
Episode: 2375/1000, score: 143, cumulative reward: -162.4893135542111
Episode: 2376/1000, score: 61, cumulative reward: -85.45931019083976
Episode: 2377/1000, score: 207, cumulative reward: -156.2927686716866
Episode: 2378/1000, score: 114, cumulative reward: -111.63512240554407
Episode: 2379/1000, score: 96, cumulative reward: -49.28689090464013
Episode: 2380/1000, score: 66, cumulative reward: -166.96059170299907
Episode: 2381/1000, score: 76, cumulative reward: -114.472300998646
Episode: 2382/1000, score: 52, cumulative reward: -217.655668453648
Episode: 2383/1000, score: 92, cumulative reward: -51.864879751227285
Episode: 2384/1000, score: 177, cumulative reward: -3.798526419334209
Episode: 2385/1000, score: 145, cumulative reward: -110.69820604948289
Episode: 2386/1000, score: 85, cumulative reward: -21.27015415338235
Episode: 2387/1000, score: 93, cumulative reward: -109.26015097165967
Episode: 2388/1000, score: 152, cumulative reward: 42.00044300877494
Episode: 2389/1000, score: 113, cumulative reward: -137.32919456482745
Episode: 2390/1000, score: 74, cumulative reward: -77.62434471194285
Episode: 2391/1000, score: 75, cumulative reward: -168.41346733365972
Episode: 2392/1000, score: 125, cumulative reward: -160.55965805355945
Episode: 2393/1000, score: 160, cumulative reward: -29.15112510540942
Episode: 2394/1000, score: 157, cumulative reward: -115.3595339968642
Episode: 2395/1000, score: 195, cumulative reward: -129.89798588791203
Episode: 2396/1000, score: 143, cumulative reward: -126.87352116154926
Episode: 2397/1000, score: 360, cumulative reward: -295.57389047772386
Episode: 2398/1000, score: 66, cumulative reward: -155.0135578470455
Episode: 2399/1000, score: 77, cumulative reward: -78.62200071593104
Episode: 2400/1000, score: 71, cumulative reward: -178.5407733406305
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2401/1000, score: 66, cumulative reward: -259.6456625677987
Episode: 2402/1000, score: 224, cumulative reward: -226.81188104813458
Episode: 2403/1000, score: 117, cumulative reward: 25.31364596720951
Episode: 2404/1000, score: 300, cumulative reward: -178.6214430157832
Episode: 2405/1000, score: 154, cumulative reward: -141.76092317303682
Episode: 2406/1000, score: 245, cumulative reward: -125.94289203989767
Episode: 2407/1000, score: 110, cumulative reward: -7.36767073403135
Episode: 2408/1000, score: 144, cumulative reward: -337.8666716838818
Episode: 2409/1000, score: 174, cumulative reward: -103.03924116463259
Episode: 2410/1000, score: 123, cumulative reward: -176.06085743867516
Episode: 2411/1000, score: 91, cumulative reward: -103.30875979153785
Episode: 2412/1000, score: 305, cumulative reward: -239.76248298291364
Episode: 2413/1000, score: 169, cumulative reward: -464.47467222692967
Episode: 2414/1000, score: 371, cumulative reward: -398.54665773159
Episode: 2415/1000, score: 465, cumulative reward: -242.46748063220676
Episode: 2416/1000, score: 176, cumulative reward: -167.26409019198564
Episode: 2417/1000, score: 90, cumulative reward: -203.77010206313932
Episode: 2418/1000, score: 159, cumulative reward: -116.70257245674007
Episode: 2419/1000, score: 91, cumulative reward: -246.48550871036633
Episode: 2420/1000, score: 424, cumulative reward: -184.06999178409973
Episode: 2421/1000, score: 437, cumulative reward: -244.30363331980666
Episode: 2422/1000, score: 99, cumulative reward: -265.07178433660187
Episode: 2423/1000, score: 94, cumulative reward: -379.09718557706884
Episode: 2424/1000, score: 480, cumulative reward: -29.425263025019092
Episode: 2425/1000, score: 109, cumulative reward: -479.0431670019379
Episode: 2426/1000, score: 224, cumulative reward: -31.18558237001973
Episode: 2427/1000, score: 985, cumulative reward: -411.7039592367324
Episode: 2428/1000, score: 187, cumulative reward: -299.30901168743895
Episode: 2429/1000, score: 487, cumulative reward: -140.90647061038487
Episode: 2430/1000, score: 581, cumulative reward: -278.44126122055275
Episode: 2431/1000, score: 203, cumulative reward: -280.67847390928085
Episode: 2432/1000, score: 302, cumulative reward: -39.35911476071725
Episode: 2433/1000, score: 615, cumulative reward: -232.02807541905534
Episode: 2434/1000, score: 364, cumulative reward: -18.98493487209025
Episode: 2435/1000, score: 152, cumulative reward: -239.1136999157065
Episode: 2436/1000, score: 102, cumulative reward: -58.18805668107277
Episode: 2437/1000, score: 112, cumulative reward: -83.29482477142477
Episode: 2438/1000, score: 203, cumulative reward: -56.30591924996388
Episode: 2439/1000, score: 394, cumulative reward: -142.48400084745145
Episode: 2440/1000, score: 151, cumulative reward: -255.53732994959427
Episode: 2441/1000, score: 181, cumulative reward: -137.23418948609518
Episode: 2442/1000, score: 249, cumulative reward: -21.008784370369142
Episode: 2443/1000, score: 86, cumulative reward: -293.9516902288668
Episode: 2444/1000, score: 77, cumulative reward: -217.18697466248702
Episode: 2445/1000, score: 148, cumulative reward: -256.92258387473635
Episode: 2446/1000, score: 193, cumulative reward: -3.372148321928293
Episode: 2447/1000, score: 186, cumulative reward: -332.62660841420893
Episode: 2448/1000, score: 101, cumulative reward: -230.1780951874082
Episode: 2449/1000, score: 364, cumulative reward: -932.3912787419358
Episode: 2450/1000, score: 106, cumulative reward: -66.87401027441553
Episode: 2451/1000, score: 92, cumulative reward: -223.5483018704544
Episode: 2452/1000, score: 146, cumulative reward: -522.3576363205723
Episode: 2453/1000, score: 137, cumulative reward: -117.15819507316934
Episode: 2454/1000, score: 79, cumulative reward: -155.55053311116234
Episode: 2455/1000, score: 124, cumulative reward: -28.558262948742083
Episode: 2456/1000, score: 208, cumulative reward: -261.86252595753143
Episode: 2457/1000, score: 370, cumulative reward: -66.1103829998734
Episode: 2458/1000, score: 72, cumulative reward: -140.74391671413593
Episode: 2459/1000, score: 229, cumulative reward: -228.97525832818462
Episode: 2460/1000, score: 650, cumulative reward: -392.180875245112
Episode: 2461/1000, score: 464, cumulative reward: -262.6176060467925
Episode: 2462/1000, score: 169, cumulative reward: -328.7155148974807
Episode: 2463/1000, score: 245, cumulative reward: -33.589206369850714
Episode: 2464/1000, score: 248, cumulative reward: -205.51619646552206
Episode: 2465/1000, score: 232, cumulative reward: -481.23632513268166
Episode: 2466/1000, score: 118, cumulative reward: -98.44719297484815
Episode: 2467/1000, score: 323, cumulative reward: -427.4740767313781
Episode: 2468/1000, score: 202, cumulative reward: -514.7663852468268
Episode: 2469/1000, score: 333, cumulative reward: -329.22095704442404
Episode: 2470/1000, score: 229, cumulative reward: -221.79403116716927
Episode: 2471/1000, score: 247, cumulative reward: -499.51050994653883
Episode: 2472/1000, score: 114, cumulative reward: -191.8245747927864
Episode: 2473/1000, score: 190, cumulative reward: -24.805660072431124
Episode: 2474/1000, score: 136, cumulative reward: -454.55979823077985
Episode: 2475/1000, score: 140, cumulative reward: -325.0541464326419
Episode: 2476/1000, score: 189, cumulative reward: -57.37703077148435
Episode: 2477/1000, score: 115, cumulative reward: -120.78196728573434
Episode: 2478/1000, score: 79, cumulative reward: -15.209635949671778
Episode: 2479/1000, score: 85, cumulative reward: -431.5974182696625
Episode: 2480/1000, score: 95, cumulative reward: -552.9836441913033
Episode: 2481/1000, score: 191, cumulative reward: -103.35639079479616
Episode: 2482/1000, score: 88, cumulative reward: -514.1003441797363
Episode: 2483/1000, score: 69, cumulative reward: -38.30980185971167
Episode: 2484/1000, score: 60, cumulative reward: -400.7477783315438
Episode: 2485/1000, score: 66, cumulative reward: -324.5073744285746
Episode: 2486/1000, score: 50, cumulative reward: -481.19911298690585
Episode: 2487/1000, score: 78, cumulative reward: -597.253432284999
Episode: 2488/1000, score: 57, cumulative reward: -481.81612078464445
Episode: 2489/1000, score: 70, cumulative reward: -450.27866554138524
Episode: 2490/1000, score: 64, cumulative reward: -647.0779228206719
Episode: 2491/1000, score: 107, cumulative reward: -639.47490432341
Episode: 2492/1000, score: 164, cumulative reward: -262.549571734151
Episode: 2493/1000, score: 250, cumulative reward: -358.4460241631073
Episode: 2494/1000, score: 274, cumulative reward: -43.00244505971165
Episode: 2495/1000, score: 115, cumulative reward: -108.09479095066592
Episode: 2496/1000, score: 148, cumulative reward: -197.91822204920885
Episode: 2497/1000, score: 126, cumulative reward: -164.29388711143417
Episode: 2498/1000, score: 100, cumulative reward: -327.9079476991515
Episode: 2499/1000, score: 139, cumulative reward: -352.8786138785839
Episode: 2500/1000, score: 113, cumulative reward: -325.2886669707776
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2501/1000, score: 116, cumulative reward: -237.98231636516513
Episode: 2502/1000, score: 151, cumulative reward: -337.3615924221564
Episode: 2503/1000, score: 200, cumulative reward: -271.0363535796044
Episode: 2504/1000, score: 189, cumulative reward: -499.2118545331899
Episode: 2505/1000, score: 165, cumulative reward: -141.8962858039367
Episode: 2506/1000, score: 98, cumulative reward: -541.4111862676158
Episode: 2507/1000, score: 191, cumulative reward: -141.93985687850318
Episode: 2508/1000, score: 150, cumulative reward: -435.7174155308778
Episode: 2509/1000, score: 121, cumulative reward: -388.0517340025401
Episode: 2510/1000, score: 232, cumulative reward: -186.33975925195654
Episode: 2511/1000, score: 126, cumulative reward: -162.02672870770195
Episode: 2512/1000, score: 140, cumulative reward: -311.67011646052674
Episode: 2513/1000, score: 195, cumulative reward: -270.4117159013903
Episode: 2514/1000, score: 148, cumulative reward: -422.64355398354127
Episode: 2515/1000, score: 120, cumulative reward: -361.693302977818
Episode: 2516/1000, score: 89, cumulative reward: -461.1801648166576
Episode: 2517/1000, score: 174, cumulative reward: -529.7950048607797
Episode: 2518/1000, score: 90, cumulative reward: -422.40649816001746
Episode: 2519/1000, score: 209, cumulative reward: -391.488515597344
Episode: 2520/1000, score: 96, cumulative reward: -520.638895027912
Episode: 2521/1000, score: 117, cumulative reward: -471.0267454654285
Episode: 2522/1000, score: 85, cumulative reward: -690.8829760881517
Episode: 2523/1000, score: 77, cumulative reward: -473.37811400907657
Episode: 2524/1000, score: 78, cumulative reward: -719.7310555912735
Episode: 2525/1000, score: 59, cumulative reward: -411.7696089648526
Episode: 2526/1000, score: 65, cumulative reward: -517.6575715992124
Episode: 2527/1000, score: 58, cumulative reward: -511.8813292889542
Episode: 2528/1000, score: 62, cumulative reward: -523.4036248052057
Episode: 2529/1000, score: 72, cumulative reward: -548.4975353279441
Episode: 2530/1000, score: 93, cumulative reward: -579.2638396375614
Episode: 2531/1000, score: 58, cumulative reward: -522.8541728377564
Episode: 2532/1000, score: 72, cumulative reward: -692.6193533540713
Episode: 2533/1000, score: 56, cumulative reward: -359.1360159155448
Episode: 2534/1000, score: 57, cumulative reward: -510.349561349044
Episode: 2535/1000, score: 53, cumulative reward: -406.216993837097
Episode: 2536/1000, score: 66, cumulative reward: -696.9628057837704
Episode: 2537/1000, score: 58, cumulative reward: -519.3354189092349
Episode: 2538/1000, score: 59, cumulative reward: -304.3235458587028
Episode: 2539/1000, score: 67, cumulative reward: -393.288127490292
Episode: 2540/1000, score: 111, cumulative reward: -397.88215231284795
Episode: 2541/1000, score: 104, cumulative reward: -463.7445115761935
Episode: 2542/1000, score: 63, cumulative reward: -626.131123718645
Episode: 2543/1000, score: 107, cumulative reward: -380.92783315034336
Episode: 2544/1000, score: 77, cumulative reward: -427.8675075822645
Episode: 2545/1000, score: 139, cumulative reward: -489.7989012810698
Episode: 2546/1000, score: 81, cumulative reward: -242.25815276234732
Episode: 2547/1000, score: 133, cumulative reward: -644.687112795466
Episode: 2548/1000, score: 87, cumulative reward: -456.19539791584367
Episode: 2549/1000, score: 168, cumulative reward: -637.3595880323351
Episode: 2550/1000, score: 81, cumulative reward: -450.5158208140017
Episode: 2551/1000, score: 92, cumulative reward: -621.0181012555504
Episode: 2552/1000, score: 84, cumulative reward: -503.8989503577797
Episode: 2553/1000, score: 90, cumulative reward: -578.4750214072056
Episode: 2554/1000, score: 62, cumulative reward: -94.40871740874047
Episode: 2555/1000, score: 84, cumulative reward: -379.87760056146925
Episode: 2556/1000, score: 91, cumulative reward: -182.7068189923961
Episode: 2557/1000, score: 57, cumulative reward: -89.57675277620179
Episode: 2558/1000, score: 153, cumulative reward: -281.9818841809971
Episode: 2559/1000, score: 210, cumulative reward: -751.0509231568236
Episode: 2560/1000, score: 128, cumulative reward: -267.1231978638279
Episode: 2561/1000, score: 50, cumulative reward: -458.7692973386977
Episode: 2562/1000, score: 91, cumulative reward: -323.41544604427867
Episode: 2563/1000, score: 93, cumulative reward: -240.08407339693846
Episode: 2564/1000, score: 108, cumulative reward: -278.67078350411487
Episode: 2565/1000, score: 83, cumulative reward: -146.61153103573164
Episode: 2566/1000, score: 126, cumulative reward: -263.77254220748944
Episode: 2567/1000, score: 77, cumulative reward: -116.09676364123911
Episode: 2568/1000, score: 85, cumulative reward: -107.66348449838965
Episode: 2569/1000, score: 64, cumulative reward: -614.9990945707698
Episode: 2570/1000, score: 153, cumulative reward: -342.25366606666586
Episode: 2571/1000, score: 77, cumulative reward: -125.32104831030163
Episode: 2572/1000, score: 87, cumulative reward: -321.6313285875723
Episode: 2573/1000, score: 101, cumulative reward: -360.94544712820556
Episode: 2574/1000, score: 86, cumulative reward: -519.9625784606737
Episode: 2575/1000, score: 114, cumulative reward: -424.40992789962377
Episode: 2576/1000, score: 94, cumulative reward: -557.7947236712057
Episode: 2577/1000, score: 176, cumulative reward: -142.04130921370427
Episode: 2578/1000, score: 89, cumulative reward: -250.13271039501157
Episode: 2579/1000, score: 68, cumulative reward: -588.8505199666422
Episode: 2580/1000, score: 190, cumulative reward: -102.93784333062585
Episode: 2581/1000, score: 96, cumulative reward: -189.12082416966894
Episode: 2582/1000, score: 92, cumulative reward: -96.15904294881248
Episode: 2583/1000, score: 104, cumulative reward: -168.46344104302324
Episode: 2584/1000, score: 225, cumulative reward: -509.6473552480821
Episode: 2585/1000, score: 217, cumulative reward: -439.80719270475146
Episode: 2586/1000, score: 125, cumulative reward: -18.720249336294913
Episode: 2587/1000, score: 126, cumulative reward: -284.0059928609759
Episode: 2588/1000, score: 77, cumulative reward: -643.7320020973077
Episode: 2589/1000, score: 129, cumulative reward: -325.1726757504323
Episode: 2590/1000, score: 81, cumulative reward: -395.72669287521603
Episode: 2591/1000, score: 54, cumulative reward: -138.62794981588706
Episode: 2592/1000, score: 54, cumulative reward: -170.8788665372054
Episode: 2593/1000, score: 80, cumulative reward: -383.64643834745016
Episode: 2594/1000, score: 53, cumulative reward: -142.26592866415763
Episode: 2595/1000, score: 55, cumulative reward: -138.8484122279432
Episode: 2596/1000, score: 96, cumulative reward: -170.02405134094266
Episode: 2597/1000, score: 84, cumulative reward: -255.86705304617314
Episode: 2598/1000, score: 91, cumulative reward: -238.4573145977971
Episode: 2599/1000, score: 88, cumulative reward: -107.76058468102461
Episode: 2600/1000, score: 97, cumulative reward: -369.9453950289911
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2601/1000, score: 127, cumulative reward: -577.7507398856212
Episode: 2602/1000, score: 82, cumulative reward: -466.1488614813509
Episode: 2603/1000, score: 54, cumulative reward: -105.5072594807715
Episode: 2604/1000, score: 271, cumulative reward: -292.5470339347705
Episode: 2605/1000, score: 126, cumulative reward: -285.93942578945456
Episode: 2606/1000, score: 205, cumulative reward: -232.83757444739237
Episode: 2607/1000, score: 157, cumulative reward: -216.31491866983833
Episode: 2608/1000, score: 97, cumulative reward: -285.65174209493557
Episode: 2609/1000, score: 94, cumulative reward: -390.7028920790644
Episode: 2610/1000, score: 81, cumulative reward: -261.86244173080513
Episode: 2611/1000, score: 105, cumulative reward: -238.62538945700683
Episode: 2612/1000, score: 116, cumulative reward: -418.7481626108638
Episode: 2613/1000, score: 55, cumulative reward: -257.94960680092186
Episode: 2614/1000, score: 101, cumulative reward: -410.13390640492145
Episode: 2615/1000, score: 78, cumulative reward: -387.14168635533827
Episode: 2616/1000, score: 114, cumulative reward: -259.1535294050225
Episode: 2617/1000, score: 202, cumulative reward: -643.312535270694
Episode: 2618/1000, score: 68, cumulative reward: -320.0907513618373
Episode: 2619/1000, score: 85, cumulative reward: -287.9183192717488
Episode: 2620/1000, score: 185, cumulative reward: -384.15297044045417
Episode: 2621/1000, score: 85, cumulative reward: -490.74756037499833
Episode: 2622/1000, score: 252, cumulative reward: -406.7863107741824
Episode: 2623/1000, score: 116, cumulative reward: -147.0491442090229
Episode: 2624/1000, score: 145, cumulative reward: -389.7980268552892
Episode: 2625/1000, score: 159, cumulative reward: -331.6531052867058
Episode: 2626/1000, score: 59, cumulative reward: -71.43640758676155
Episode: 2627/1000, score: 71, cumulative reward: -300.4191679105329
Episode: 2628/1000, score: 70, cumulative reward: -420.5263260172858
Episode: 2629/1000, score: 77, cumulative reward: -406.747198772763
Episode: 2630/1000, score: 89, cumulative reward: -366.3003416948636
Episode: 2631/1000, score: 87, cumulative reward: -141.91276786617814
Episode: 2632/1000, score: 80, cumulative reward: -293.3706449485534
Episode: 2633/1000, score: 81, cumulative reward: -600.0311562710519
Episode: 2634/1000, score: 76, cumulative reward: -409.4156902400288
Episode: 2635/1000, score: 79, cumulative reward: -409.0068507770622
Episode: 2636/1000, score: 69, cumulative reward: -468.50665549881217
Episode: 2637/1000, score: 69, cumulative reward: -585.6174307497449
Episode: 2638/1000, score: 67, cumulative reward: -104.11602204140831
Episode: 2639/1000, score: 67, cumulative reward: -107.97582678422748
Episode: 2640/1000, score: 77, cumulative reward: -547.3219617663724
Episode: 2641/1000, score: 63, cumulative reward: -229.02815095177905
Episode: 2642/1000, score: 109, cumulative reward: -94.190883075504
Episode: 2643/1000, score: 71, cumulative reward: -371.7633552565412
Episode: 2644/1000, score: 185, cumulative reward: -369.54696268905946
Episode: 2645/1000, score: 76, cumulative reward: -557.9049609042254
Episode: 2646/1000, score: 67, cumulative reward: -115.49933466784384
Episode: 2647/1000, score: 51, cumulative reward: -123.0948482753351
Episode: 2648/1000, score: 103, cumulative reward: -422.2166993551686
Episode: 2649/1000, score: 119, cumulative reward: -492.23857687944957
Episode: 2650/1000, score: 97, cumulative reward: -493.30834434673557
Episode: 2651/1000, score: 78, cumulative reward: -523.1057811479591
Episode: 2652/1000, score: 74, cumulative reward: -332.81781571607064
Episode: 2653/1000, score: 76, cumulative reward: -411.20427352188995
Episode: 2654/1000, score: 162, cumulative reward: -541.9922843446507
Episode: 2655/1000, score: 89, cumulative reward: -303.2333694004333
Episode: 2656/1000, score: 178, cumulative reward: -447.73395131485313
Episode: 2657/1000, score: 76, cumulative reward: -488.66373598863163
Episode: 2658/1000, score: 70, cumulative reward: -595.5638491998482
Episode: 2659/1000, score: 231, cumulative reward: -555.1365207674094
Episode: 2660/1000, score: 95, cumulative reward: -378.57989779551644
Episode: 2661/1000, score: 116, cumulative reward: -397.7479476308139
Episode: 2662/1000, score: 151, cumulative reward: -529.9748567178203
Episode: 2663/1000, score: 136, cumulative reward: -578.0630317737075
Episode: 2664/1000, score: 223, cumulative reward: -693.1838341327717
Episode: 2665/1000, score: 98, cumulative reward: -342.7185579562115
Episode: 2666/1000, score: 115, cumulative reward: -439.20305783227013
Episode: 2667/1000, score: 168, cumulative reward: -366.67358891231777
Episode: 2668/1000, score: 99, cumulative reward: -508.403447208964
Episode: 2669/1000, score: 180, cumulative reward: -737.052351589546
Episode: 2670/1000, score: 70, cumulative reward: -627.3909741300565
Episode: 2671/1000, score: 123, cumulative reward: -670.7878420259764
Episode: 2672/1000, score: 134, cumulative reward: -533.2575731401114
Episode: 2673/1000, score: 123, cumulative reward: -736.2026332831238
Episode: 2674/1000, score: 64, cumulative reward: -213.3209620997992
Episode: 2675/1000, score: 67, cumulative reward: -609.8340771607353
Episode: 2676/1000, score: 72, cumulative reward: -586.663885846802
Episode: 2677/1000, score: 110, cumulative reward: -401.22578186353854
Episode: 2678/1000, score: 74, cumulative reward: -689.2277850589593
Episode: 2679/1000, score: 172, cumulative reward: -724.2879416044136
Episode: 2680/1000, score: 74, cumulative reward: -559.3908757589122
Episode: 2681/1000, score: 61, cumulative reward: -643.927789163642
Episode: 2682/1000, score: 83, cumulative reward: -656.6387302158678
Episode: 2683/1000, score: 75, cumulative reward: -616.5235923623696
Episode: 2684/1000, score: 104, cumulative reward: -342.8405850197587
Episode: 2685/1000, score: 72, cumulative reward: -475.01203439692455
Episode: 2686/1000, score: 131, cumulative reward: -565.7007020097667
Episode: 2687/1000, score: 81, cumulative reward: -511.8960315115575
Episode: 2688/1000, score: 92, cumulative reward: -255.025436041074
Episode: 2689/1000, score: 188, cumulative reward: -337.6696181500572
Episode: 2690/1000, score: 96, cumulative reward: -475.53945410285735
Episode: 2691/1000, score: 91, cumulative reward: -445.432224912538
Episode: 2692/1000, score: 85, cumulative reward: -220.74920316425062
Episode: 2693/1000, score: 115, cumulative reward: -240.658728189806
Episode: 2694/1000, score: 140, cumulative reward: -628.3130036518238
Episode: 2695/1000, score: 98, cumulative reward: -504.0196703577262
Episode: 2696/1000, score: 231, cumulative reward: -548.7629977958218
Episode: 2697/1000, score: 188, cumulative reward: -513.7880905797699
Episode: 2698/1000, score: 96, cumulative reward: -501.2824957940486
Episode: 2699/1000, score: 146, cumulative reward: -651.0739895938525
Episode: 2700/1000, score: 52, cumulative reward: -392.5298060264253
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2701/1000, score: 204, cumulative reward: -514.1931669351613
Episode: 2702/1000, score: 108, cumulative reward: -499.18201292693334
Episode: 2703/1000, score: 106, cumulative reward: -281.6856153762807
Episode: 2704/1000, score: 147, cumulative reward: -244.4972263847647
Episode: 2705/1000, score: 198, cumulative reward: -352.83735703103383
Episode: 2706/1000, score: 88, cumulative reward: -198.48429619716512
Episode: 2707/1000, score: 121, cumulative reward: -165.27630155487034
Episode: 2708/1000, score: 87, cumulative reward: -42.32709138155769
Episode: 2709/1000, score: 106, cumulative reward: -356.1775278099558
Episode: 2710/1000, score: 228, cumulative reward: -624.7164947150973
Episode: 2711/1000, score: 169, cumulative reward: -314.7870061431771
Episode: 2712/1000, score: 220, cumulative reward: -233.58976446614932
Episode: 2713/1000, score: 84, cumulative reward: -249.88276082801866
Episode: 2714/1000, score: 77, cumulative reward: -498.06172652760415
Episode: 2715/1000, score: 88, cumulative reward: -581.5325687943639
Episode: 2716/1000, score: 84, cumulative reward: -518.9300995773646
Episode: 2717/1000, score: 106, cumulative reward: -423.524468540969
Episode: 2718/1000, score: 154, cumulative reward: -370.0700369093048
Episode: 2719/1000, score: 133, cumulative reward: -362.42170336361903
Episode: 2720/1000, score: 85, cumulative reward: -366.5000103080828
Episode: 2721/1000, score: 137, cumulative reward: -314.15505920764804
Episode: 2722/1000, score: 107, cumulative reward: -323.4977219701398
Episode: 2723/1000, score: 69, cumulative reward: -277.937696617724
Episode: 2724/1000, score: 124, cumulative reward: -413.26834840932827
Episode: 2725/1000, score: 122, cumulative reward: -306.2604819026408
Episode: 2726/1000, score: 155, cumulative reward: -294.24359966739223
Episode: 2727/1000, score: 156, cumulative reward: -638.032582841041
Episode: 2728/1000, score: 135, cumulative reward: -299.6029380464065
Episode: 2729/1000, score: 103, cumulative reward: -344.10853459856924
Episode: 2730/1000, score: 184, cumulative reward: -443.7327111123186
Episode: 2731/1000, score: 154, cumulative reward: -654.1557488902727
Episode: 2732/1000, score: 93, cumulative reward: -458.12282031273725
Episode: 2733/1000, score: 96, cumulative reward: -493.57839941129157
Episode: 2734/1000, score: 95, cumulative reward: -539.8392612844292
Episode: 2735/1000, score: 87, cumulative reward: -577.5316654746491
Episode: 2736/1000, score: 82, cumulative reward: -618.8045297852923
Episode: 2737/1000, score: 85, cumulative reward: -557.7783394708215
Episode: 2738/1000, score: 85, cumulative reward: -292.4729978324492
Episode: 2739/1000, score: 84, cumulative reward: -352.0412831862252
Episode: 2740/1000, score: 143, cumulative reward: -343.5064054136958
Episode: 2741/1000, score: 99, cumulative reward: -408.05071887536815
Episode: 2742/1000, score: 137, cumulative reward: -355.35817508612683
Episode: 2743/1000, score: 189, cumulative reward: -527.0932502658084
Episode: 2744/1000, score: 104, cumulative reward: -413.99890391979443
Episode: 2745/1000, score: 122, cumulative reward: -432.24804863480415
Episode: 2746/1000, score: 122, cumulative reward: -249.15976286840618
Episode: 2747/1000, score: 202, cumulative reward: -399.20133305720327
Episode: 2748/1000, score: 199, cumulative reward: -289.916871558093
Episode: 2749/1000, score: 126, cumulative reward: -363.4128053823367
Episode: 2750/1000, score: 82, cumulative reward: -508.93365456897106
Episode: 2751/1000, score: 74, cumulative reward: -469.70220917103626
Episode: 2752/1000, score: 56, cumulative reward: -367.4384931632762
Episode: 2753/1000, score: 127, cumulative reward: -530.6025991473992
Episode: 2754/1000, score: 223, cumulative reward: -157.08882961127162
Episode: 2755/1000, score: 95, cumulative reward: -428.15304000361806
Episode: 2756/1000, score: 62, cumulative reward: -312.19140931758426
Episode: 2757/1000, score: 95, cumulative reward: -243.64744164422217
Episode: 2758/1000, score: 141, cumulative reward: -253.25637617257317
Episode: 2759/1000, score: 84, cumulative reward: -502.9884053297684
Episode: 2760/1000, score: 99, cumulative reward: -500.819904577698
Episode: 2761/1000, score: 93, cumulative reward: -279.55105484777835
Episode: 2762/1000, score: 197, cumulative reward: -192.68359558137178
Episode: 2763/1000, score: 132, cumulative reward: -316.9654229632566
Episode: 2764/1000, score: 102, cumulative reward: -267.6867018566261
Episode: 2765/1000, score: 154, cumulative reward: -351.9870711862054
Episode: 2766/1000, score: 180, cumulative reward: -203.85275679957329
Episode: 2767/1000, score: 92, cumulative reward: -482.2678785299808
Episode: 2768/1000, score: 131, cumulative reward: -332.99166970088174
Episode: 2769/1000, score: 164, cumulative reward: -25.834797345710015
Episode: 2770/1000, score: 96, cumulative reward: -350.60039105701355
Episode: 2771/1000, score: 174, cumulative reward: -233.57262947681525
Episode: 2772/1000, score: 91, cumulative reward: -355.76253294843923
Episode: 2773/1000, score: 96, cumulative reward: -332.53640434653397
Episode: 2774/1000, score: 67, cumulative reward: -384.24881226707504
Episode: 2775/1000, score: 68, cumulative reward: -611.4797424944453
Episode: 2776/1000, score: 87, cumulative reward: -863.7777736135984
Episode: 2777/1000, score: 103, cumulative reward: -81.13146755029581
Episode: 2778/1000, score: 80, cumulative reward: -185.82548438000836
Episode: 2779/1000, score: 179, cumulative reward: -327.5085247008143
Episode: 2780/1000, score: 109, cumulative reward: -248.6393404634256
Episode: 2781/1000, score: 78, cumulative reward: -332.87494639869857
Episode: 2782/1000, score: 83, cumulative reward: -344.03907732144995
Episode: 2783/1000, score: 179, cumulative reward: -233.13280970547498
Episode: 2784/1000, score: 87, cumulative reward: -184.70226698188713
Episode: 2785/1000, score: 87, cumulative reward: -211.82523506838805
Episode: 2786/1000, score: 75, cumulative reward: -297.4138741994527
Episode: 2787/1000, score: 94, cumulative reward: -289.6462211267554
Episode: 2788/1000, score: 87, cumulative reward: -283.24495938872246
Episode: 2789/1000, score: 90, cumulative reward: -221.50939857887892
Episode: 2790/1000, score: 85, cumulative reward: -325.4836080980617
Episode: 2791/1000, score: 103, cumulative reward: -65.92371478923316
Episode: 2792/1000, score: 117, cumulative reward: -285.7929297149936
Episode: 2793/1000, score: 154, cumulative reward: -104.46882801311048
Episode: 2794/1000, score: 129, cumulative reward: -392.2294326924574
Episode: 2795/1000, score: 90, cumulative reward: -398.6003742182849
Episode: 2796/1000, score: 66, cumulative reward: -280.4408120751124
Episode: 2797/1000, score: 247, cumulative reward: -36.64029266193998
Episode: 2798/1000, score: 141, cumulative reward: -77.99168076007543
Episode: 2799/1000, score: 148, cumulative reward: -280.65178998055933
Episode: 2800/1000, score: 161, cumulative reward: -82.82074197581622
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2801/1000, score: 67, cumulative reward: -629.5079193049614
Episode: 2802/1000, score: 91, cumulative reward: -92.6543710521476
Episode: 2803/1000, score: 125, cumulative reward: -368.53103695450795
Episode: 2804/1000, score: 73, cumulative reward: -170.62957444723423
Episode: 2805/1000, score: 57, cumulative reward: -453.3953345824187
Episode: 2806/1000, score: 68, cumulative reward: -232.66166569733394
Episode: 2807/1000, score: 136, cumulative reward: -326.1641912591602
Episode: 2808/1000, score: 66, cumulative reward: -533.5212858779112
Episode: 2809/1000, score: 110, cumulative reward: -340.3828859831492
Episode: 2810/1000, score: 170, cumulative reward: -293.1275231376017
Episode: 2811/1000, score: 104, cumulative reward: -202.2363844802431
Episode: 2812/1000, score: 148, cumulative reward: -255.6274974259521
Episode: 2813/1000, score: 93, cumulative reward: -337.4353564313893
Episode: 2814/1000, score: 182, cumulative reward: -142.58214764227114
Episode: 2815/1000, score: 149, cumulative reward: -168.14164698845732
Episode: 2816/1000, score: 68, cumulative reward: -167.83606628206826
Episode: 2817/1000, score: 130, cumulative reward: -317.22345719682914
Episode: 2818/1000, score: 201, cumulative reward: -262.43870840345573
Episode: 2819/1000, score: 86, cumulative reward: -109.81371424862533
Episode: 2820/1000, score: 94, cumulative reward: -181.7440006995914
Episode: 2821/1000, score: 195, cumulative reward: -11.772549241812229
Episode: 2822/1000, score: 84, cumulative reward: -302.056435025053
Episode: 2823/1000, score: 127, cumulative reward: -57.26813610324011
Episode: 2824/1000, score: 87, cumulative reward: -61.18203645862985
Episode: 2825/1000, score: 109, cumulative reward: -188.2109479193844
Episode: 2826/1000, score: 129, cumulative reward: -110.22590223500872
Episode: 2827/1000, score: 118, cumulative reward: -497.5647796663062
Episode: 2828/1000, score: 97, cumulative reward: -181.75107145150628
Episode: 2829/1000, score: 163, cumulative reward: -193.47206778664213
Episode: 2830/1000, score: 85, cumulative reward: -223.07530774877796
Episode: 2831/1000, score: 348, cumulative reward: -257.1832149524371
Episode: 2832/1000, score: 89, cumulative reward: 51.85600118212142
Episode: 2833/1000, score: 157, cumulative reward: -264.44655035085907
Episode: 2834/1000, score: 87, cumulative reward: -218.00864784092767
Episode: 2835/1000, score: 124, cumulative reward: -495.81354469699625
Episode: 2836/1000, score: 60, cumulative reward: -443.9678937128607
Episode: 2837/1000, score: 259, cumulative reward: -71.69313294184876
Episode: 2838/1000, score: 82, cumulative reward: -390.6607763156129
Episode: 2839/1000, score: 98, cumulative reward: -170.47054411984936
Episode: 2840/1000, score: 106, cumulative reward: -184.3437489237061
Episode: 2841/1000, score: 98, cumulative reward: -70.53140584693712
Episode: 2842/1000, score: 103, cumulative reward: -302.0430561243487
Episode: 2843/1000, score: 336, cumulative reward: -140.05884399857592
Episode: 2845/1000, score: 121, cumulative reward: -164.01587746336082
Episode: 2846/1000, score: 172, cumulative reward: -106.98953222373807
Episode: 2847/1000, score: 714, cumulative reward: 141.9396493572442
Episode: 2848/1000, score: 123, cumulative reward: -178.72761614473006
Episode: 2849/1000, score: 127, cumulative reward: -331.48062461126756
Episode: 2850/1000, score: 112, cumulative reward: -160.56616820999255
Episode: 2851/1000, score: 373, cumulative reward: -235.5354816380444
Episode: 2852/1000, score: 253, cumulative reward: -182.22116297870318
Episode: 2853/1000, score: 76, cumulative reward: -111.22898863138491
Episode: 2854/1000, score: 92, cumulative reward: -271.34542936076605
Episode: 2855/1000, score: 89, cumulative reward: -217.14740481665746
Episode: 2856/1000, score: 85, cumulative reward: -163.80898417971
Episode: 2857/1000, score: 98, cumulative reward: -225.6183631451754
Episode: 2858/1000, score: 178, cumulative reward: -314.56697192004737
Episode: 2859/1000, score: 108, cumulative reward: -390.5709472304268
Episode: 2860/1000, score: 90, cumulative reward: -336.36957053284885
Episode: 2861/1000, score: 236, cumulative reward: -21.018501505427366
Episode: 2862/1000, score: 131, cumulative reward: -276.8305743303955
Episode: 2863/1000, score: 132, cumulative reward: -228.03940383873604
Episode: 2864/1000, score: 142, cumulative reward: -17.55451258000403
Episode: 2865/1000, score: 120, cumulative reward: -129.48250146598355
Episode: 2866/1000, score: 199, cumulative reward: -145.2877086234953
Episode: 2867/1000, score: 109, cumulative reward: -81.1462108564859
Episode: 2868/1000, score: 130, cumulative reward: -194.81343266651692
Episode: 2869/1000, score: 117, cumulative reward: -106.14347378928666
Episode: 2870/1000, score: 114, cumulative reward: -318.0340992034198
Episode: 2871/1000, score: 117, cumulative reward: -138.69474600740259
Episode: 2872/1000, score: 111, cumulative reward: -239.15100119155892
Episode: 2873/1000, score: 169, cumulative reward: -113.76325568216623
Episode: 2874/1000, score: 210, cumulative reward: -277.37720158514037
Episode: 2875/1000, score: 103, cumulative reward: -328.00990310751513
Episode: 2876/1000, score: 54, cumulative reward: -193.19349373263668
Episode: 2877/1000, score: 386, cumulative reward: 175.21297702125844
Episode: 2878/1000, score: 207, cumulative reward: -236.70804689966437
Episode: 2879/1000, score: 125, cumulative reward: -334.0318945685721
Episode: 2880/1000, score: 74, cumulative reward: -507.4426586098776
Episode: 2881/1000, score: 284, cumulative reward: -311.50102250532547
Episode: 2882/1000, score: 113, cumulative reward: -390.82885958574377
Episode: 2883/1000, score: 61, cumulative reward: -448.0617672056378
Episode: 2884/1000, score: 137, cumulative reward: -78.95181667328708
Episode: 2885/1000, score: 124, cumulative reward: -146.15276798484288
Episode: 2886/1000, score: 51, cumulative reward: -327.7912291839031
Episode: 2887/1000, score: 66, cumulative reward: -352.6101855754838
Episode: 2888/1000, score: 165, cumulative reward: -117.60450950786021
Episode: 2889/1000, score: 91, cumulative reward: -46.707400438798686
Episode: 2890/1000, score: 147, cumulative reward: -551.3922226745156
Episode: 2891/1000, score: 94, cumulative reward: -87.38057422663638
Episode: 2892/1000, score: 101, cumulative reward: -474.2973261430298
Episode: 2893/1000, score: 90, cumulative reward: -113.83618355342958
Episode: 2894/1000, score: 85, cumulative reward: -235.7052147993947
Episode: 2895/1000, score: 95, cumulative reward: -35.946003930277556
Episode: 2896/1000, score: 79, cumulative reward: -133.58781887102586
Episode: 2897/1000, score: 108, cumulative reward: -98.05820558075833
Episode: 2898/1000, score: 78, cumulative reward: -402.7732384105978
Episode: 2899/1000, score: 131, cumulative reward: -458.72655345575515
Episode: 2900/1000, score: 61, cumulative reward: -105.37946602948227
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 2901/1000, score: 72, cumulative reward: -137.85244561989117
Episode: 2902/1000, score: 112, cumulative reward: -145.66959775753028
Episode: 2903/1000, score: 126, cumulative reward: -248.59682385160784
Episode: 2904/1000, score: 87, cumulative reward: -312.03734240297854
Episode: 2905/1000, score: 91, cumulative reward: -157.7423428872193
Episode: 2906/1000, score: 260, cumulative reward: -550.9709399275594
Episode: 2907/1000, score: 154, cumulative reward: -605.671384737466
Episode: 2908/1000, score: 146, cumulative reward: -403.74295773297354
Episode: 2909/1000, score: 90, cumulative reward: -268.7214693418682
Episode: 2910/1000, score: 171, cumulative reward: -77.00415141488497
Episode: 2911/1000, score: 240, cumulative reward: -41.41990280335459
Episode: 2912/1000, score: 164, cumulative reward: -246.02887458841937
Episode: 2913/1000, score: 268, cumulative reward: -346.61655012383824
Episode: 2914/1000, score: 222, cumulative reward: -548.5199368382644
Episode: 2915/1000, score: 60, cumulative reward: -437.13319467680833
Episode: 2916/1000, score: 55, cumulative reward: -514.580197215289
Episode: 2917/1000, score: 69, cumulative reward: -311.5106855165782
Episode: 2918/1000, score: 105, cumulative reward: -116.5839068131175
Episode: 2919/1000, score: 560, cumulative reward: -430.5829865955281
Episode: 2920/1000, score: 180, cumulative reward: -278.65789873181063
Episode: 2921/1000, score: 143, cumulative reward: -389.5585753461142
Episode: 2922/1000, score: 121, cumulative reward: -346.7505403322563
Episode: 2923/1000, score: 85, cumulative reward: -1010.8117602847979
Episode: 2924/1000, score: 71, cumulative reward: -457.6431071909841
Episode: 2925/1000, score: 86, cumulative reward: -236.18817530819535
Episode: 2926/1000, score: 123, cumulative reward: -14.082626736424018
Episode: 2927/1000, score: 96, cumulative reward: -284.20004997578053
Episode: 2928/1000, score: 121, cumulative reward: -84.83398657401895
Episode: 2929/1000, score: 179, cumulative reward: -169.83664432781677
Episode: 2930/1000, score: 142, cumulative reward: -82.1790656155132
Episode: 2931/1000, score: 160, cumulative reward: -253.70092208096835
Episode: 2932/1000, score: 210, cumulative reward: -344.8144060963335
Episode: 2933/1000, score: 93, cumulative reward: -309.28185746061024
Episode: 2934/1000, score: 114, cumulative reward: -275.5548479442104
Episode: 2935/1000, score: 219, cumulative reward: -347.8332901807148
Episode: 2936/1000, score: 119, cumulative reward: -242.85192753320854
Episode: 2937/1000, score: 131, cumulative reward: -163.12938185385053
Episode: 2938/1000, score: 111, cumulative reward: -304.3766352583847
Episode: 2939/1000, score: 99, cumulative reward: -104.84903263968043
Episode: 2940/1000, score: 99, cumulative reward: -298.66119633268573
Episode: 2941/1000, score: 313, cumulative reward: -265.1807840957278
Episode: 2942/1000, score: 140, cumulative reward: -447.8720777996196
Episode: 2943/1000, score: 87, cumulative reward: -490.8352868597776
Episode: 2944/1000, score: 75, cumulative reward: -575.3972746188158
Episode: 2945/1000, score: 96, cumulative reward: -274.6762887354979
Episode: 2946/1000, score: 99, cumulative reward: -264.2451021693769
Episode: 2947/1000, score: 296, cumulative reward: -611.3426054694007
Episode: 2948/1000, score: 361, cumulative reward: -198.0531968082028
Episode: 2949/1000, score: 223, cumulative reward: -210.9835356027948
Episode: 2950/1000, score: 155, cumulative reward: -485.643419470996
Episode: 2951/1000, score: 116, cumulative reward: -559.2081109038786
Episode: 2952/1000, score: 73, cumulative reward: -288.68853304484423
Episode: 2953/1000, score: 94, cumulative reward: -112.665040008641
Episode: 2954/1000, score: 98, cumulative reward: -84.54033416047162
Episode: 2955/1000, score: 107, cumulative reward: -411.7639754570788
Episode: 2956/1000, score: 573, cumulative reward: -307.0646628606812
Episode: 2957/1000, score: 98, cumulative reward: -337.15089500140056
Episode: 2958/1000, score: 245, cumulative reward: -431.8930496600185
Episode: 2959/1000, score: 305, cumulative reward: -232.62272495581766
Episode: 2960/1000, score: 155, cumulative reward: -68.19564721450037
Episode: 2961/1000, score: 100, cumulative reward: -279.092708814949
Episode: 2962/1000, score: 505, cumulative reward: -226.31684591667067
Episode: 2963/1000, score: 523, cumulative reward: -167.36634549799885
Episode: 2964/1000, score: 208, cumulative reward: -113.51327664218056
Episode: 2965/1000, score: 326, cumulative reward: -293.5258073605333
Episode: 2966/1000, score: 255, cumulative reward: -252.88448659010197
Episode: 2968/1000, score: 257, cumulative reward: -573.2046626252854
Episode: 2969/1000, score: 450, cumulative reward: -237.94763311310916
Episode: 2970/1000, score: 246, cumulative reward: -305.72401143753393
Episode: 2971/1000, score: 220, cumulative reward: -151.71871332089452
Episode: 2972/1000, score: 102, cumulative reward: -530.2330158926114
Episode: 2973/1000, score: 360, cumulative reward: -197.95730139495612
Episode: 2974/1000, score: 197, cumulative reward: -169.39136384893197
Episode: 2975/1000, score: 259, cumulative reward: -441.7588410656432
Episode: 2976/1000, score: 206, cumulative reward: -246.88215780755866
Episode: 2977/1000, score: 813, cumulative reward: -725.0036767298701
Episode: 2978/1000, score: 330, cumulative reward: -963.4109742211172
Episode: 2979/1000, score: 928, cumulative reward: -516.7697353821875
Episode: 2981/1000, score: 477, cumulative reward: -320.03050029934
Episode: 2982/1000, score: 292, cumulative reward: -346.1101289923916
Episode: 2983/1000, score: 384, cumulative reward: -394.55493226126396
Episode: 2984/1000, score: 472, cumulative reward: -434.6446414903137
Episode: 2985/1000, score: 464, cumulative reward: -390.7198451746871
Episode: 2986/1000, score: 383, cumulative reward: -385.4830612126572
Episode: 2987/1000, score: 537, cumulative reward: -470.5137382179342
Episode: 2988/1000, score: 560, cumulative reward: -380.2870862929472
Episode: 2989/1000, score: 964, cumulative reward: -462.1007446956882
Episode: 2990/1000, score: 595, cumulative reward: -473.49627236489164
Episode: 2991/1000, score: 188, cumulative reward: -782.5853421843924
Episode: 2992/1000, score: 849, cumulative reward: -604.7595534873146
Episode: 2993/1000, score: 748, cumulative reward: -379.23068687640813
Episode: 2994/1000, score: 262, cumulative reward: -241.55280532344307
Episode: 2995/1000, score: 609, cumulative reward: -387.3295220500572
Episode: 2996/1000, score: 123, cumulative reward: -606.027376310838
Episode: 2997/1000, score: 53, cumulative reward: -504.8843371473807
Episode: 2998/1000, score: 69, cumulative reward: -410.2914544020913
Episode: 2999/1000, score: 60, cumulative reward: -139.68861143462868
Episode: 3000/1000, score: 81, cumulative reward: -146.74757091941996
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3001/1000, score: 62, cumulative reward: -415.1632695263564
Episode: 3002/1000, score: 58, cumulative reward: -453.3279935960463
Episode: 3003/1000, score: 65, cumulative reward: -631.801534664541
Episode: 3004/1000, score: 90, cumulative reward: -383.7179874631076
Episode: 3005/1000, score: 87, cumulative reward: -354.33194067149685
Episode: 3006/1000, score: 60, cumulative reward: -278.3034587259565
Episode: 3007/1000, score: 65, cumulative reward: -531.2364322367798
Episode: 3008/1000, score: 61, cumulative reward: -499.91886743095586
Episode: 3009/1000, score: 82, cumulative reward: -472.7406596801483
Episode: 3010/1000, score: 75, cumulative reward: -329.20978551902033
Episode: 3011/1000, score: 55, cumulative reward: -202.61722145880148
Episode: 3012/1000, score: 79, cumulative reward: -247.93442084706408
Episode: 3013/1000, score: 70, cumulative reward: -249.9466015431918
Episode: 3014/1000, score: 81, cumulative reward: -82.54934632399454
Episode: 3015/1000, score: 95, cumulative reward: -90.06035538393846
Episode: 3016/1000, score: 120, cumulative reward: -314.5527511687362
Episode: 3017/1000, score: 88, cumulative reward: -33.53053237021321
Episode: 3018/1000, score: 65, cumulative reward: -422.3209790935239
Episode: 3019/1000, score: 177, cumulative reward: -642.7606341183957
Episode: 3020/1000, score: 182, cumulative reward: -494.4329572515865
Episode: 3021/1000, score: 66, cumulative reward: -494.7043736617734
Episode: 3022/1000, score: 52, cumulative reward: -318.62589446207494
Episode: 3023/1000, score: 111, cumulative reward: -413.2266013728521
Episode: 3024/1000, score: 158, cumulative reward: -235.48867903730422
Episode: 3025/1000, score: 123, cumulative reward: -221.37860663907873
Episode: 3026/1000, score: 101, cumulative reward: -239.0641263448059
Episode: 3027/1000, score: 54, cumulative reward: -380.9848664803667
Episode: 3028/1000, score: 90, cumulative reward: -552.8746649169148
Episode: 3029/1000, score: 97, cumulative reward: -513.2337716339496
Episode: 3030/1000, score: 92, cumulative reward: -324.8966747620193
Episode: 3031/1000, score: 99, cumulative reward: -294.73750417236454
Episode: 3032/1000, score: 80, cumulative reward: -278.7212354369346
Episode: 3033/1000, score: 94, cumulative reward: -410.3444180479541
Episode: 3034/1000, score: 77, cumulative reward: -110.64955987324467
Episode: 3035/1000, score: 68, cumulative reward: -476.5926533475622
Episode: 3036/1000, score: 65, cumulative reward: -311.3742255012111
Episode: 3037/1000, score: 78, cumulative reward: -621.7476542925831
Episode: 3038/1000, score: 82, cumulative reward: -417.12806093622106
Episode: 3039/1000, score: 79, cumulative reward: -273.3549108616474
Episode: 3040/1000, score: 78, cumulative reward: -236.1532877576869
Episode: 3041/1000, score: 58, cumulative reward: -287.6101293768962
Episode: 3042/1000, score: 117, cumulative reward: -513.3176323860558
Episode: 3043/1000, score: 68, cumulative reward: -478.75992193906916
Episode: 3044/1000, score: 76, cumulative reward: -387.82121497128765
Episode: 3045/1000, score: 60, cumulative reward: -561.6407356084492
Episode: 3046/1000, score: 64, cumulative reward: -556.089238530269
Episode: 3047/1000, score: 84, cumulative reward: -280.07034370243025
Episode: 3048/1000, score: 64, cumulative reward: -595.9842122958958
Episode: 3049/1000, score: 69, cumulative reward: -499.4884352380045
Episode: 3050/1000, score: 92, cumulative reward: -489.33265577460776
Episode: 3051/1000, score: 61, cumulative reward: -427.8032483448974
Episode: 3052/1000, score: 73, cumulative reward: -526.9842225484546
Episode: 3053/1000, score: 71, cumulative reward: -434.7716061040707
Episode: 3054/1000, score: 66, cumulative reward: -313.77712025669047
Episode: 3055/1000, score: 82, cumulative reward: -263.6911522446058
Episode: 3056/1000, score: 61, cumulative reward: -145.56610157095815
Episode: 3057/1000, score: 75, cumulative reward: -486.0443052831911
Episode: 3058/1000, score: 78, cumulative reward: -213.70059840008918
Episode: 3059/1000, score: 84, cumulative reward: -392.6548281085775
Episode: 3060/1000, score: 134, cumulative reward: -95.8021741749962
Episode: 3061/1000, score: 57, cumulative reward: -194.53924140794402
Episode: 3062/1000, score: 75, cumulative reward: -133.08130314202367
Episode: 3063/1000, score: 110, cumulative reward: -159.650464185337
Episode: 3064/1000, score: 83, cumulative reward: -402.98633986001823
Episode: 3065/1000, score: 66, cumulative reward: -499.48409091935974
Episode: 3066/1000, score: 63, cumulative reward: -302.48222070548906
Episode: 3067/1000, score: 67, cumulative reward: -227.4576462837744
Episode: 3068/1000, score: 62, cumulative reward: -145.73372870600747
Episode: 3069/1000, score: 89, cumulative reward: -436.05826111162906
Episode: 3070/1000, score: 73, cumulative reward: -549.057278772466
Episode: 3071/1000, score: 66, cumulative reward: -302.8289500013973
Episode: 3072/1000, score: 179, cumulative reward: -206.61147717977838
Episode: 3073/1000, score: 109, cumulative reward: -214.72146956924144
Episode: 3074/1000, score: 87, cumulative reward: -411.94630237782343
Episode: 3075/1000, score: 98, cumulative reward: -93.11324591083587
Episode: 3076/1000, score: 117, cumulative reward: -373.45468043168773
Episode: 3077/1000, score: 64, cumulative reward: -57.2457626227288
Episode: 3078/1000, score: 52, cumulative reward: -225.06783662123644
Episode: 3079/1000, score: 89, cumulative reward: -276.63276928375336
Episode: 3080/1000, score: 117, cumulative reward: -191.0063015032996
Episode: 3081/1000, score: 87, cumulative reward: -307.1085836780859
Episode: 3082/1000, score: 105, cumulative reward: -115.17970397353102
Episode: 3083/1000, score: 55, cumulative reward: -440.54398468977047
Episode: 3084/1000, score: 187, cumulative reward: -195.4115889724588
Episode: 3085/1000, score: 77, cumulative reward: -325.6194156758602
Episode: 3086/1000, score: 141, cumulative reward: -133.25418846748775
Episode: 3087/1000, score: 148, cumulative reward: -45.493985599054284
Episode: 3088/1000, score: 68, cumulative reward: -229.83828018435958
Episode: 3089/1000, score: 98, cumulative reward: -442.2394983149179
Episode: 3090/1000, score: 69, cumulative reward: -435.5348642926937
Episode: 3091/1000, score: 107, cumulative reward: -235.55733260674745
Episode: 3092/1000, score: 77, cumulative reward: -186.41221517196925
Episode: 3093/1000, score: 88, cumulative reward: -341.622202357539
Episode: 3094/1000, score: 73, cumulative reward: -250.9215143053453
Episode: 3095/1000, score: 90, cumulative reward: -226.95683063574762
Episode: 3096/1000, score: 86, cumulative reward: -301.764335398063
Episode: 3097/1000, score: 121, cumulative reward: -149.2882853253022
Episode: 3098/1000, score: 144, cumulative reward: -332.3477234664049
Episode: 3099/1000, score: 121, cumulative reward: -320.3371129408121
Episode: 3100/1000, score: 65, cumulative reward: -289.47137857131577
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3101/1000, score: 74, cumulative reward: -393.903516685012
Episode: 3102/1000, score: 69, cumulative reward: -439.98915088601643
Episode: 3103/1000, score: 86, cumulative reward: -381.955310423809
Episode: 3104/1000, score: 96, cumulative reward: -321.50650964512306
Episode: 3105/1000, score: 73, cumulative reward: -110.86188586920716
Episode: 3106/1000, score: 86, cumulative reward: -374.46730203227406
Episode: 3107/1000, score: 97, cumulative reward: -157.61161181373194
Episode: 3108/1000, score: 105, cumulative reward: -114.75317796720068
Episode: 3109/1000, score: 73, cumulative reward: -468.3833157609035
Episode: 3110/1000, score: 90, cumulative reward: -383.33884946667064
Episode: 3111/1000, score: 73, cumulative reward: -506.3849421143533
Episode: 3112/1000, score: 105, cumulative reward: -266.49040085636
Episode: 3113/1000, score: 168, cumulative reward: -299.38346217110995
Episode: 3114/1000, score: 138, cumulative reward: -566.8297000222801
Episode: 3115/1000, score: 57, cumulative reward: -368.5207249181457
Episode: 3116/1000, score: 130, cumulative reward: -94.46198105021723
Episode: 3117/1000, score: 73, cumulative reward: -545.8548605944086
Episode: 3118/1000, score: 307, cumulative reward: -254.2867250589832
Episode: 3119/1000, score: 109, cumulative reward: -434.4808292156822
Episode: 3120/1000, score: 108, cumulative reward: -383.5145611617146
Episode: 3121/1000, score: 175, cumulative reward: -77.83355728751714
Episode: 3122/1000, score: 236, cumulative reward: -420.2380076038135
Episode: 3123/1000, score: 74, cumulative reward: -569.9204524895218
Episode: 3124/1000, score: 105, cumulative reward: -321.2242528956948
Episode: 3125/1000, score: 88, cumulative reward: -405.4072307412991
Episode: 3126/1000, score: 81, cumulative reward: -501.8762926587505
Episode: 3127/1000, score: 82, cumulative reward: -345.91169109118925
Episode: 3128/1000, score: 61, cumulative reward: -473.2062880330546
Episode: 3129/1000, score: 77, cumulative reward: -539.7836578598026
Episode: 3130/1000, score: 75, cumulative reward: -238.14354283178162
Episode: 3131/1000, score: 94, cumulative reward: -521.4926863125438
Episode: 3132/1000, score: 122, cumulative reward: -421.77896494799245
Episode: 3133/1000, score: 89, cumulative reward: -256.84488304058016
Episode: 3134/1000, score: 106, cumulative reward: -768.9656508033578
Episode: 3135/1000, score: 57, cumulative reward: -484.0355483180701
Episode: 3136/1000, score: 50, cumulative reward: -291.96259942577103
Episode: 3137/1000, score: 67, cumulative reward: -273.19098654860886
Episode: 3138/1000, score: 68, cumulative reward: -120.0097011674905
Episode: 3139/1000, score: 69, cumulative reward: -240.10442366163858
Episode: 3140/1000, score: 90, cumulative reward: -147.4997618796781
Episode: 3141/1000, score: 66, cumulative reward: -550.1492343037505
Episode: 3142/1000, score: 86, cumulative reward: -217.3889532889981
Episode: 3143/1000, score: 71, cumulative reward: -527.2643012349888
Episode: 3144/1000, score: 83, cumulative reward: -735.5364811390921
Episode: 3145/1000, score: 67, cumulative reward: -485.81639089565164
Episode: 3146/1000, score: 55, cumulative reward: -420.43980060736334
Episode: 3147/1000, score: 64, cumulative reward: -246.6280428596698
Episode: 3148/1000, score: 66, cumulative reward: -415.30504377917225
Episode: 3149/1000, score: 65, cumulative reward: -402.8215487048285
Episode: 3150/1000, score: 69, cumulative reward: -449.6644839647943
Episode: 3151/1000, score: 76, cumulative reward: -165.74693385407494
Episode: 3152/1000, score: 84, cumulative reward: -140.77495050987028
Episode: 3153/1000, score: 104, cumulative reward: -502.7251383936518
Episode: 3154/1000, score: 48, cumulative reward: -274.5679569626747
Episode: 3155/1000, score: 65, cumulative reward: -316.8143591589445
Episode: 3156/1000, score: 57, cumulative reward: -35.7456314223075
Episode: 3157/1000, score: 75, cumulative reward: -400.11344331854434
Episode: 3158/1000, score: 95, cumulative reward: -423.14128860688527
Episode: 3159/1000, score: 76, cumulative reward: -207.19878495437032
Episode: 3160/1000, score: 67, cumulative reward: -34.059184461051984
Episode: 3161/1000, score: 88, cumulative reward: -115.46938071265487
Episode: 3162/1000, score: 83, cumulative reward: -321.3887957326713
Episode: 3163/1000, score: 65, cumulative reward: -456.43697465584825
Episode: 3164/1000, score: 74, cumulative reward: -255.77540189488187
Episode: 3165/1000, score: 83, cumulative reward: -515.7900811215186
Episode: 3166/1000, score: 62, cumulative reward: -189.94403709105046
Episode: 3167/1000, score: 60, cumulative reward: -359.11398365229905
Episode: 3168/1000, score: 77, cumulative reward: -453.2062043365282
Episode: 3169/1000, score: 71, cumulative reward: -338.4681941978994
Episode: 3170/1000, score: 88, cumulative reward: -65.13910461957065
Episode: 3171/1000, score: 85, cumulative reward: -127.33761757990523
Episode: 3172/1000, score: 95, cumulative reward: -305.1470552293664
Episode: 3173/1000, score: 70, cumulative reward: -157.56064903179737
Episode: 3174/1000, score: 79, cumulative reward: -179.31153007139937
Episode: 3175/1000, score: 113, cumulative reward: -115.77250153918317
Episode: 3176/1000, score: 73, cumulative reward: -68.94723556050596
Episode: 3177/1000, score: 69, cumulative reward: -227.58878217738646
Episode: 3178/1000, score: 78, cumulative reward: -216.8491158052623
Episode: 3179/1000, score: 82, cumulative reward: -126.1746116541523
Episode: 3180/1000, score: 95, cumulative reward: -40.40297717504513
Episode: 3181/1000, score: 62, cumulative reward: -364.6006166629544
Episode: 3182/1000, score: 69, cumulative reward: -403.5835055467579
Episode: 3183/1000, score: 76, cumulative reward: -101.85659772533879
Episode: 3184/1000, score: 63, cumulative reward: -171.24341864401322
Episode: 3185/1000, score: 52, cumulative reward: -180.99921389994768
Episode: 3186/1000, score: 84, cumulative reward: -152.53826727666805
Episode: 3187/1000, score: 58, cumulative reward: -152.00578899767808
Episode: 3188/1000, score: 76, cumulative reward: -106.8083709628472
Episode: 3189/1000, score: 112, cumulative reward: -141.33914758364853
Episode: 3190/1000, score: 103, cumulative reward: -196.35563327836684
Episode: 3191/1000, score: 70, cumulative reward: -121.81846518127583
Episode: 3192/1000, score: 102, cumulative reward: 13.094559144199835
Episode: 3193/1000, score: 80, cumulative reward: -138.9256157368772
Episode: 3194/1000, score: 104, cumulative reward: -239.65451481390787
Episode: 3195/1000, score: 64, cumulative reward: -132.84192263623777
Episode: 3196/1000, score: 101, cumulative reward: -66.45936545761518
Episode: 3197/1000, score: 243, cumulative reward: -113.57393921323016
Episode: 3198/1000, score: 86, cumulative reward: -123.210516035755
Episode: 3199/1000, score: 103, cumulative reward: -426.5748056149952
Episode: 3200/1000, score: 185, cumulative reward: -22.87584187477117
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3201/1000, score: 102, cumulative reward: -339.3094147346293
Episode: 3202/1000, score: 114, cumulative reward: -392.1366283046403
Episode: 3203/1000, score: 107, cumulative reward: -254.44642653919738
Episode: 3204/1000, score: 305, cumulative reward: -71.46863852229853
Episode: 3205/1000, score: 154, cumulative reward: -208.85031128347032
Episode: 3206/1000, score: 346, cumulative reward: -212.27191555317194
Episode: 3207/1000, score: 163, cumulative reward: -320.4196689492142
Episode: 3208/1000, score: 101, cumulative reward: -57.54494828642122
Episode: 3209/1000, score: 152, cumulative reward: -197.8572344639923
Episode: 3210/1000, score: 89, cumulative reward: -120.98895053740439
Episode: 3211/1000, score: 99, cumulative reward: -519.1279332017059
Episode: 3212/1000, score: 249, cumulative reward: -268.56263814441456
Episode: 3213/1000, score: 147, cumulative reward: -433.57031465167034
Episode: 3214/1000, score: 163, cumulative reward: -552.9904214707016
Episode: 3215/1000, score: 138, cumulative reward: -498.73641833554905
Episode: 3216/1000, score: 68, cumulative reward: -175.2303420855909
Episode: 3217/1000, score: 74, cumulative reward: -540.2785704271238
Episode: 3218/1000, score: 135, cumulative reward: -208.1741415050426
Episode: 3219/1000, score: 56, cumulative reward: -495.6673510589123
Episode: 3220/1000, score: 99, cumulative reward: -185.19106330966184
Episode: 3221/1000, score: 88, cumulative reward: -143.9095836921124
Episode: 3222/1000, score: 137, cumulative reward: -19.049353193957316
Episode: 3223/1000, score: 83, cumulative reward: -300.15619824410317
Episode: 3224/1000, score: 106, cumulative reward: -241.87562058474904
Episode: 3225/1000, score: 109, cumulative reward: -56.89629876247528
Episode: 3226/1000, score: 69, cumulative reward: -444.4023021516079
Episode: 3227/1000, score: 127, cumulative reward: -192.4451084193975
Episode: 3228/1000, score: 116, cumulative reward: -53.755894546163475
Episode: 3229/1000, score: 77, cumulative reward: -534.9601773384445
Episode: 3230/1000, score: 116, cumulative reward: -248.36087528156614
Episode: 3231/1000, score: 141, cumulative reward: -411.3340197204377
Episode: 3232/1000, score: 84, cumulative reward: -410.9381312465589
Episode: 3233/1000, score: 110, cumulative reward: -562.147097674214
Episode: 3234/1000, score: 90, cumulative reward: -230.18072952539956
Episode: 3235/1000, score: 62, cumulative reward: -565.3151890672777
Episode: 3236/1000, score: 111, cumulative reward: -578.7860082333481
Episode: 3237/1000, score: 80, cumulative reward: -512.3673433050818
Episode: 3238/1000, score: 68, cumulative reward: -435.907296436453
Episode: 3239/1000, score: 53, cumulative reward: -461.0567226687488
Episode: 3240/1000, score: 62, cumulative reward: -193.5085582128667
Episode: 3241/1000, score: 78, cumulative reward: -348.3980633919359
Episode: 3242/1000, score: 58, cumulative reward: -191.32632567993443
Episode: 3243/1000, score: 125, cumulative reward: -597.9586394969806
Episode: 3244/1000, score: 105, cumulative reward: -564.8254768812246
Episode: 3245/1000, score: 82, cumulative reward: -162.73908842131357
Episode: 3246/1000, score: 131, cumulative reward: -426.1162820276443
Episode: 3247/1000, score: 50, cumulative reward: -286.89453000867036
Episode: 3248/1000, score: 53, cumulative reward: -133.65584450139798
Episode: 3249/1000, score: 59, cumulative reward: -131.51306365686756
Episode: 3250/1000, score: 68, cumulative reward: -483.64338164537133
Episode: 3251/1000, score: 128, cumulative reward: -118.82510783310704
Episode: 3252/1000, score: 90, cumulative reward: -348.27712683591125
Episode: 3253/1000, score: 118, cumulative reward: -690.5979214786594
Episode: 3254/1000, score: 100, cumulative reward: -67.43145341256184
Episode: 3255/1000, score: 83, cumulative reward: -135.42009900520821
Episode: 3256/1000, score: 72, cumulative reward: -371.63251801060477
Episode: 3257/1000, score: 92, cumulative reward: -316.3272387580972
Episode: 3258/1000, score: 107, cumulative reward: -382.1834676866969
Episode: 3259/1000, score: 92, cumulative reward: -402.55147884646783
Episode: 3260/1000, score: 92, cumulative reward: -411.36172188120304
Episode: 3261/1000, score: 103, cumulative reward: -104.07048510629915
Episode: 3262/1000, score: 81, cumulative reward: -511.96323486036226
Episode: 3263/1000, score: 62, cumulative reward: -146.92509598166072
Episode: 3264/1000, score: 120, cumulative reward: -339.6868609746758
Episode: 3265/1000, score: 92, cumulative reward: -141.51250744639754
Episode: 3266/1000, score: 81, cumulative reward: -260.04651559394557
Episode: 3267/1000, score: 106, cumulative reward: -182.14336446289093
Episode: 3268/1000, score: 86, cumulative reward: -360.31746003908347
Episode: 3269/1000, score: 102, cumulative reward: -219.73446804828959
Episode: 3270/1000, score: 73, cumulative reward: -412.89050563606855
Episode: 3271/1000, score: 121, cumulative reward: -370.62692701830235
Episode: 3272/1000, score: 134, cumulative reward: -381.2209840323886
Episode: 3273/1000, score: 150, cumulative reward: -201.2958740836662
Episode: 3274/1000, score: 89, cumulative reward: -269.53968916640935
Episode: 3275/1000, score: 80, cumulative reward: -696.7333107482108
Episode: 3276/1000, score: 137, cumulative reward: -248.5350846856823
Episode: 3277/1000, score: 158, cumulative reward: -500.2773875353157
Episode: 3278/1000, score: 82, cumulative reward: -234.84891510117328
Episode: 3279/1000, score: 155, cumulative reward: -228.70156465361256
Episode: 3280/1000, score: 85, cumulative reward: -178.0951695516403
Episode: 3281/1000, score: 90, cumulative reward: -378.6319923825945
Episode: 3282/1000, score: 154, cumulative reward: -1075.149761060829
Episode: 3283/1000, score: 125, cumulative reward: -200.13732394391963
Episode: 3284/1000, score: 88, cumulative reward: -243.84843479290657
Episode: 3285/1000, score: 78, cumulative reward: -275.1129024444267
Episode: 3286/1000, score: 77, cumulative reward: -366.03115837801226
Episode: 3287/1000, score: 114, cumulative reward: -59.540981496305854
Episode: 3288/1000, score: 93, cumulative reward: -376.5603096110482
Episode: 3289/1000, score: 63, cumulative reward: -18.618918300614965
Episode: 3290/1000, score: 93, cumulative reward: -25.61612770900905
Episode: 3291/1000, score: 91, cumulative reward: -478.60916623153196
Episode: 3292/1000, score: 68, cumulative reward: -312.77277363800334
Episode: 3293/1000, score: 93, cumulative reward: -224.34160058270945
Episode: 3294/1000, score: 78, cumulative reward: -373.08605772776855
Episode: 3295/1000, score: 67, cumulative reward: -138.33316149844387
Episode: 3296/1000, score: 89, cumulative reward: -464.10994399084007
Episode: 3297/1000, score: 98, cumulative reward: -200.9312595345897
Episode: 3298/1000, score: 73, cumulative reward: -581.5606141458139
Episode: 3299/1000, score: 81, cumulative reward: -398.0071523038618
Episode: 3300/1000, score: 72, cumulative reward: -137.74487634089783
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3301/1000, score: 131, cumulative reward: -230.97806246986752
Episode: 3302/1000, score: 124, cumulative reward: -469.4232001474014
Episode: 3303/1000, score: 108, cumulative reward: -350.89457556016134
Episode: 3304/1000, score: 99, cumulative reward: -461.5286580165973
Episode: 3305/1000, score: 130, cumulative reward: -819.7266466390181
Episode: 3306/1000, score: 100, cumulative reward: -348.3591510774538
Episode: 3307/1000, score: 131, cumulative reward: -217.94798608185636
Episode: 3308/1000, score: 77, cumulative reward: -430.3535759367509
Episode: 3309/1000, score: 78, cumulative reward: -382.7421607270269
Episode: 3310/1000, score: 119, cumulative reward: -489.3349011244842
Episode: 3311/1000, score: 89, cumulative reward: -599.115238937097
Episode: 3312/1000, score: 79, cumulative reward: -127.73026566891137
Episode: 3313/1000, score: 136, cumulative reward: -324.73676228254203
Episode: 3314/1000, score: 60, cumulative reward: -119.77968177092359
Episode: 3315/1000, score: 107, cumulative reward: -150.83513091452124
Episode: 3316/1000, score: 99, cumulative reward: -313.9600342176316
Episode: 3317/1000, score: 77, cumulative reward: -161.58793009805942
Episode: 3318/1000, score: 75, cumulative reward: -135.5430410191281
Episode: 3319/1000, score: 73, cumulative reward: -449.305961750285
Episode: 3320/1000, score: 52, cumulative reward: -335.4255490210097
Episode: 3321/1000, score: 92, cumulative reward: -329.94661582027356
Episode: 3322/1000, score: 114, cumulative reward: -26.315201428015612
Episode: 3323/1000, score: 89, cumulative reward: -46.001067873334925
Episode: 3324/1000, score: 85, cumulative reward: -68.9094277873243
Episode: 3325/1000, score: 73, cumulative reward: -56.18139253082998
Episode: 3326/1000, score: 75, cumulative reward: -254.82352016792123
Episode: 3327/1000, score: 68, cumulative reward: -136.8474049098116
Episode: 3328/1000, score: 57, cumulative reward: -142.31494911020522
Episode: 3329/1000, score: 121, cumulative reward: -358.8432025864684
Episode: 3330/1000, score: 76, cumulative reward: -147.25514343164457
Episode: 3331/1000, score: 60, cumulative reward: -215.55034118218714
Episode: 3332/1000, score: 93, cumulative reward: -245.3844131898176
Episode: 3333/1000, score: 56, cumulative reward: -107.9898166351356
Episode: 3334/1000, score: 87, cumulative reward: -385.607297980589
Episode: 3335/1000, score: 134, cumulative reward: -489.9285151276967
Episode: 3336/1000, score: 79, cumulative reward: -527.6849894341602
Episode: 3337/1000, score: 58, cumulative reward: -295.1231038878539
Episode: 3338/1000, score: 63, cumulative reward: -167.71233481944603
Episode: 3339/1000, score: 88, cumulative reward: -315.21911186908585
Episode: 3340/1000, score: 79, cumulative reward: -316.71960915076534
Episode: 3341/1000, score: 94, cumulative reward: -100.13489349456344
Episode: 3342/1000, score: 72, cumulative reward: -168.3438633444466
Episode: 3343/1000, score: 78, cumulative reward: -192.750787614765
Episode: 3344/1000, score: 65, cumulative reward: -256.3635715554624
Episode: 3345/1000, score: 64, cumulative reward: -337.47316586004814
Episode: 3346/1000, score: 82, cumulative reward: -219.35495186311795
Episode: 3347/1000, score: 127, cumulative reward: -320.11483602576084
Episode: 3348/1000, score: 84, cumulative reward: -316.4169823955569
Episode: 3349/1000, score: 52, cumulative reward: -137.79457017320559
Episode: 3350/1000, score: 128, cumulative reward: -105.18392321385313
Episode: 3351/1000, score: 95, cumulative reward: -89.52688759064742
Episode: 3352/1000, score: 80, cumulative reward: -273.6113111219238
Episode: 3353/1000, score: 87, cumulative reward: -95.02970951306972
Episode: 3354/1000, score: 92, cumulative reward: -146.25431842225115
Episode: 3355/1000, score: 92, cumulative reward: -29.441114370844787
Episode: 3356/1000, score: 82, cumulative reward: -209.2691647437834
Episode: 3357/1000, score: 71, cumulative reward: -401.84130949893085
Episode: 3358/1000, score: 97, cumulative reward: -737.8411833632474
Episode: 3359/1000, score: 80, cumulative reward: -417.27924931706275
Episode: 3360/1000, score: 86, cumulative reward: -98.92813035311647
Episode: 3361/1000, score: 71, cumulative reward: -214.31555444928028
Episode: 3362/1000, score: 101, cumulative reward: -444.6302891185115
Episode: 3363/1000, score: 94, cumulative reward: -71.65648729971413
Episode: 3364/1000, score: 72, cumulative reward: -386.65169243052577
Episode: 3365/1000, score: 83, cumulative reward: 14.873033094422212
Episode: 3366/1000, score: 89, cumulative reward: -162.63288287991412
Episode: 3367/1000, score: 78, cumulative reward: -161.55375011439153
Episode: 3368/1000, score: 92, cumulative reward: -471.98257532263335
Episode: 3369/1000, score: 133, cumulative reward: -120.5278597126352
Episode: 3370/1000, score: 84, cumulative reward: -383.5073925788913
Episode: 3371/1000, score: 83, cumulative reward: -15.437543510628231
Episode: 3372/1000, score: 63, cumulative reward: -531.7145086195678
Episode: 3373/1000, score: 77, cumulative reward: -109.44366950471223
Episode: 3374/1000, score: 100, cumulative reward: -66.09273071760663
Episode: 3375/1000, score: 84, cumulative reward: -107.73069288540454
Episode: 3376/1000, score: 78, cumulative reward: -318.9948364012998
Episode: 3377/1000, score: 75, cumulative reward: -415.751990109423
Episode: 3378/1000, score: 98, cumulative reward: -548.2161777280022
Episode: 3379/1000, score: 70, cumulative reward: -156.85609876107424
Episode: 3380/1000, score: 83, cumulative reward: -301.3142431792593
Episode: 3381/1000, score: 61, cumulative reward: -81.85155427372636
Episode: 3382/1000, score: 147, cumulative reward: -61.958374043677615
Episode: 3383/1000, score: 97, cumulative reward: -328.6370561215688
Episode: 3384/1000, score: 49, cumulative reward: -138.22629441919412
Episode: 3385/1000, score: 89, cumulative reward: -292.12555254389724
Episode: 3386/1000, score: 84, cumulative reward: -301.9452609756175
Episode: 3387/1000, score: 76, cumulative reward: -303.52505506236093
Episode: 3388/1000, score: 85, cumulative reward: -161.65906954941693
Episode: 3389/1000, score: 84, cumulative reward: -209.54005676017
Episode: 3390/1000, score: 96, cumulative reward: -113.59180013493112
Episode: 3391/1000, score: 80, cumulative reward: -505.2124687604835
Episode: 3392/1000, score: 135, cumulative reward: -103.68209481687597
Episode: 3393/1000, score: 86, cumulative reward: -479.89253642042513
Episode: 3394/1000, score: 100, cumulative reward: -353.0075195669058
Episode: 3395/1000, score: 109, cumulative reward: -542.0003574218251
Episode: 3396/1000, score: 101, cumulative reward: -211.97309206237233
Episode: 3397/1000, score: 71, cumulative reward: -604.2591528218297
Episode: 3398/1000, score: 72, cumulative reward: -229.40867769629534
Episode: 3399/1000, score: 81, cumulative reward: -129.09298765964266
Episode: 3400/1000, score: 89, cumulative reward: -334.392997387889
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3401/1000, score: 114, cumulative reward: -187.12260052802077
Episode: 3402/1000, score: 84, cumulative reward: -367.4444731024571
Episode: 3403/1000, score: 84, cumulative reward: -488.7624768926567
Episode: 3404/1000, score: 88, cumulative reward: -113.517493097007
Episode: 3405/1000, score: 121, cumulative reward: -165.95392937883173
Episode: 3406/1000, score: 156, cumulative reward: 6.477107799198905
Episode: 3407/1000, score: 76, cumulative reward: -151.48332205788367
Episode: 3408/1000, score: 61, cumulative reward: -324.10987311959
Episode: 3409/1000, score: 62, cumulative reward: -79.15839352363153
Episode: 3410/1000, score: 126, cumulative reward: -139.27321810058407
Episode: 3411/1000, score: 70, cumulative reward: -243.0288132521347
Episode: 3412/1000, score: 58, cumulative reward: -103.31604326611553
Episode: 3413/1000, score: 54, cumulative reward: -204.69454020971165
Episode: 3414/1000, score: 67, cumulative reward: -75.01285023544054
Episode: 3415/1000, score: 90, cumulative reward: -185.97649117936794
Episode: 3416/1000, score: 91, cumulative reward: -84.55053923735444
Episode: 3417/1000, score: 80, cumulative reward: -243.9428098485584
Episode: 3418/1000, score: 100, cumulative reward: -120.76706375332692
Episode: 3419/1000, score: 117, cumulative reward: -95.13152250031727
Episode: 3420/1000, score: 58, cumulative reward: -116.66644109853908
Episode: 3421/1000, score: 76, cumulative reward: -94.12757507218242
Episode: 3422/1000, score: 87, cumulative reward: -189.44962359138844
Episode: 3423/1000, score: 113, cumulative reward: -71.1946243551503
Episode: 3424/1000, score: 76, cumulative reward: -50.402199835781545
Episode: 3425/1000, score: 121, cumulative reward: -85.67378261355356
Episode: 3426/1000, score: 67, cumulative reward: -400.41454428466244
Episode: 3427/1000, score: 54, cumulative reward: -293.50625892564784
Episode: 3428/1000, score: 78, cumulative reward: -323.3214682146697
Episode: 3429/1000, score: 75, cumulative reward: -201.7270916215217
Episode: 3430/1000, score: 107, cumulative reward: -41.8957767778455
Episode: 3431/1000, score: 90, cumulative reward: -234.53384336221194
Episode: 3432/1000, score: 87, cumulative reward: -48.11862123422797
Episode: 3433/1000, score: 83, cumulative reward: -314.9576641408577
Episode: 3434/1000, score: 96, cumulative reward: -205.3045115547002
Episode: 3435/1000, score: 63, cumulative reward: -198.52862655400384
Episode: 3436/1000, score: 86, cumulative reward: -169.56641313543054
Episode: 3437/1000, score: 77, cumulative reward: -144.63180840517242
Episode: 3438/1000, score: 76, cumulative reward: -261.9088025166264
Episode: 3439/1000, score: 120, cumulative reward: -363.72408139925295
Episode: 3440/1000, score: 151, cumulative reward: -63.49574774994623
Episode: 3441/1000, score: 151, cumulative reward: -108.45650076813129
Episode: 3442/1000, score: 118, cumulative reward: -55.991531184197534
Episode: 3443/1000, score: 114, cumulative reward: -204.61211465222203
Episode: 3444/1000, score: 123, cumulative reward: -102.4715652993539
Episode: 3445/1000, score: 120, cumulative reward: -434.21278431175267
Episode: 3446/1000, score: 128, cumulative reward: -54.114604698267485
Episode: 3447/1000, score: 121, cumulative reward: -102.65053152850379
Episode: 3448/1000, score: 116, cumulative reward: -152.12434852891542
Episode: 3449/1000, score: 142, cumulative reward: -318.0215569185167
Episode: 3450/1000, score: 96, cumulative reward: -301.89259925569104
Episode: 3451/1000, score: 74, cumulative reward: -372.93595397819405
Episode: 3452/1000, score: 591, cumulative reward: 175.36638059708326
Episode: 3453/1000, score: 51, cumulative reward: -438.49517116752054
Episode: 3454/1000, score: 80, cumulative reward: -5.214092462034742
Episode: 3455/1000, score: 148, cumulative reward: -78.30922490271543
Episode: 3456/1000, score: 91, cumulative reward: -207.78001305771912
Episode: 3457/1000, score: 137, cumulative reward: -102.8024751513985
Episode: 3458/1000, score: 93, cumulative reward: -408.22868788855544
Episode: 3459/1000, score: 167, cumulative reward: -213.65333309091614
Episode: 3460/1000, score: 103, cumulative reward: -288.9000488849558
Episode: 3461/1000, score: 81, cumulative reward: -103.05483111877314
Episode: 3462/1000, score: 55, cumulative reward: -166.38203761296873
Episode: 3463/1000, score: 179, cumulative reward: -318.6935184334203
Episode: 3464/1000, score: 76, cumulative reward: -359.58134755405325
Episode: 3465/1000, score: 64, cumulative reward: -267.30960220002044
Episode: 3466/1000, score: 69, cumulative reward: -438.766426962726
Episode: 3467/1000, score: 80, cumulative reward: -166.36729615261515
Episode: 3468/1000, score: 81, cumulative reward: -456.26019236795315
Episode: 3469/1000, score: 102, cumulative reward: -74.98769880301225
Episode: 3470/1000, score: 93, cumulative reward: -307.95026126503456
Episode: 3471/1000, score: 57, cumulative reward: -426.31459011169665
Episode: 3472/1000, score: 90, cumulative reward: -275.57695043944886
Episode: 3473/1000, score: 89, cumulative reward: -247.35981495303977
Episode: 3474/1000, score: 57, cumulative reward: -290.4311041432046
Episode: 3475/1000, score: 130, cumulative reward: -107.96114997864846
Episode: 3476/1000, score: 78, cumulative reward: -410.62005712962156
Episode: 3477/1000, score: 122, cumulative reward: -51.48556756440064
Episode: 3478/1000, score: 94, cumulative reward: -410.61306013768314
Episode: 3479/1000, score: 87, cumulative reward: -87.64937363871127
Episode: 3480/1000, score: 154, cumulative reward: -325.1282606675976
Episode: 3481/1000, score: 119, cumulative reward: -338.96855116125573
Episode: 3482/1000, score: 55, cumulative reward: -293.1033659183204
Episode: 3483/1000, score: 177, cumulative reward: -358.5670708325075
Episode: 3484/1000, score: 74, cumulative reward: -463.61284600173707
Episode: 3485/1000, score: 68, cumulative reward: -256.8362174598776
Episode: 3486/1000, score: 106, cumulative reward: -201.02488575880847
Episode: 3487/1000, score: 106, cumulative reward: -541.9221170758782
Episode: 3488/1000, score: 60, cumulative reward: -421.42781417788285
Episode: 3489/1000, score: 78, cumulative reward: -271.1789776487793
Episode: 3490/1000, score: 110, cumulative reward: -117.04362264896231
Episode: 3491/1000, score: 59, cumulative reward: -342.11204651718026
Episode: 3492/1000, score: 66, cumulative reward: -277.52083099930775
Episode: 3493/1000, score: 111, cumulative reward: -433.9514317833798
Episode: 3494/1000, score: 120, cumulative reward: -292.7044192757075
Episode: 3495/1000, score: 57, cumulative reward: -487.5503842771315
Episode: 3496/1000, score: 103, cumulative reward: -623.5038087559626
Episode: 3497/1000, score: 63, cumulative reward: -455.62135265665705
Episode: 3498/1000, score: 96, cumulative reward: -588.1363219006549
Episode: 3499/1000, score: 85, cumulative reward: -467.44507452257494
Episode: 3500/1000, score: 114, cumulative reward: -229.95256343176413
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3501/1000, score: 110, cumulative reward: -342.0164966048547
Episode: 3502/1000, score: 94, cumulative reward: -92.93756281144994
Episode: 3503/1000, score: 70, cumulative reward: -283.6604260376134
Episode: 3504/1000, score: 79, cumulative reward: -458.86317228774203
Episode: 3505/1000, score: 81, cumulative reward: -305.66825787801906
Episode: 3506/1000, score: 67, cumulative reward: -438.19858799158794
Episode: 3507/1000, score: 74, cumulative reward: -266.496115682716
Episode: 3508/1000, score: 58, cumulative reward: -100.0761932595992
Episode: 3509/1000, score: 137, cumulative reward: -297.8285211573693
Episode: 3510/1000, score: 68, cumulative reward: -390.3249029596624
Episode: 3511/1000, score: 156, cumulative reward: -15.879939131512671
Episode: 3512/1000, score: 61, cumulative reward: -623.3930381700012
Episode: 3513/1000, score: 65, cumulative reward: -522.1748290765131
Episode: 3514/1000, score: 62, cumulative reward: -425.88912101178005
Episode: 3515/1000, score: 108, cumulative reward: -268.74987867213474
Episode: 3516/1000, score: 83, cumulative reward: -236.03765168035383
Episode: 3517/1000, score: 87, cumulative reward: -358.892232750481
Episode: 3518/1000, score: 86, cumulative reward: -518.3010570488467
Episode: 3519/1000, score: 53, cumulative reward: -366.51769868642
Episode: 3520/1000, score: 57, cumulative reward: -381.4252659150524
Episode: 3521/1000, score: 115, cumulative reward: -316.43530141757014
Episode: 3522/1000, score: 79, cumulative reward: -196.3814960576858
Episode: 3523/1000, score: 83, cumulative reward: -472.2339472496409
Episode: 3524/1000, score: 72, cumulative reward: -381.6285068662839
Episode: 3525/1000, score: 97, cumulative reward: -484.35961168250833
Episode: 3526/1000, score: 118, cumulative reward: -425.4138983074866
Episode: 3527/1000, score: 93, cumulative reward: -545.7607719707422
Episode: 3528/1000, score: 96, cumulative reward: -260.9324726467569
Episode: 3529/1000, score: 68, cumulative reward: -362.5472122265183
Episode: 3530/1000, score: 64, cumulative reward: -159.4283932052696
Episode: 3531/1000, score: 117, cumulative reward: -436.8077575766335
Episode: 3532/1000, score: 67, cumulative reward: -80.80996393484061
Episode: 3533/1000, score: 88, cumulative reward: -159.67476870355597
Episode: 3534/1000, score: 86, cumulative reward: -591.7441764327482
Episode: 3535/1000, score: 93, cumulative reward: -238.04006387381392
Episode: 3536/1000, score: 84, cumulative reward: -273.7027202859958
Episode: 3537/1000, score: 90, cumulative reward: -292.6893706615544
Episode: 3538/1000, score: 59, cumulative reward: -205.0147843569685
Episode: 3539/1000, score: 78, cumulative reward: -144.63715750443583
Episode: 3540/1000, score: 77, cumulative reward: -498.7653798017161
Episode: 3541/1000, score: 58, cumulative reward: -95.80771747938643
Episode: 3542/1000, score: 70, cumulative reward: -40.62314955226428
Episode: 3543/1000, score: 96, cumulative reward: -149.8105204003246
Episode: 3544/1000, score: 78, cumulative reward: -555.927449938677
Episode: 3545/1000, score: 52, cumulative reward: -348.9707756320688
Episode: 3546/1000, score: 87, cumulative reward: -114.2487201398958
Episode: 3547/1000, score: 62, cumulative reward: -260.8704133229748
Episode: 3548/1000, score: 66, cumulative reward: -78.52562617490416
Episode: 3549/1000, score: 81, cumulative reward: -161.36158852341077
Episode: 3550/1000, score: 80, cumulative reward: -805.841129809578
Episode: 3551/1000, score: 97, cumulative reward: -77.70983643706093
Episode: 3552/1000, score: 104, cumulative reward: -424.30133078865623
Episode: 3553/1000, score: 64, cumulative reward: -196.50789606421546
Episode: 3554/1000, score: 79, cumulative reward: -548.9156820645285
Episode: 3555/1000, score: 53, cumulative reward: -168.0973363720093
Episode: 3556/1000, score: 97, cumulative reward: -296.688987433496
Episode: 3557/1000, score: 63, cumulative reward: -121.74751528942757
Episode: 3558/1000, score: 107, cumulative reward: -153.58137987696261
Episode: 3559/1000, score: 80, cumulative reward: -487.34817389863935
Episode: 3560/1000, score: 74, cumulative reward: -377.6663533833208
Episode: 3561/1000, score: 74, cumulative reward: -150.3705901537567
Episode: 3562/1000, score: 90, cumulative reward: -320.4966581101735
Episode: 3563/1000, score: 91, cumulative reward: -249.53685933625965
Episode: 3564/1000, score: 54, cumulative reward: -225.59907547894335
Episode: 3565/1000, score: 93, cumulative reward: -163.6906033100987
Episode: 3566/1000, score: 110, cumulative reward: -204.26777166914744
Episode: 3567/1000, score: 120, cumulative reward: -456.50865070732226
Episode: 3568/1000, score: 108, cumulative reward: -334.00563656918166
Episode: 3569/1000, score: 84, cumulative reward: -544.6377700209886
Episode: 3570/1000, score: 95, cumulative reward: -474.7751714104036
Episode: 3571/1000, score: 78, cumulative reward: -712.7190492873781
Episode: 3572/1000, score: 85, cumulative reward: -446.6052365282697
Episode: 3573/1000, score: 80, cumulative reward: -241.98142417611064
Episode: 3574/1000, score: 104, cumulative reward: -110.49015387864152
Episode: 3575/1000, score: 67, cumulative reward: -364.11742845099735
Episode: 3576/1000, score: 65, cumulative reward: -225.39392690634634
Episode: 3577/1000, score: 118, cumulative reward: -334.76127038656915
Episode: 3578/1000, score: 76, cumulative reward: -822.992515692343
Episode: 3579/1000, score: 53, cumulative reward: -266.04216367678123
Episode: 3580/1000, score: 94, cumulative reward: -668.2787585801493
Episode: 3581/1000, score: 50, cumulative reward: -297.96484588790446
Episode: 3582/1000, score: 95, cumulative reward: -54.48302634787099
Episode: 3583/1000, score: 84, cumulative reward: -291.27593897789876
Episode: 3584/1000, score: 86, cumulative reward: -241.04083628994024
Episode: 3585/1000, score: 80, cumulative reward: -265.11536246185517
Episode: 3586/1000, score: 76, cumulative reward: -151.23940965519625
Episode: 3587/1000, score: 66, cumulative reward: -411.9994456918454
Episode: 3588/1000, score: 130, cumulative reward: -256.498012091408
Episode: 3589/1000, score: 60, cumulative reward: -41.71251893533969
Episode: 3590/1000, score: 80, cumulative reward: -567.3277561625287
Episode: 3591/1000, score: 63, cumulative reward: -334.035580534723
Episode: 3592/1000, score: 114, cumulative reward: -584.9663608927156
Episode: 3593/1000, score: 82, cumulative reward: -72.29699583195335
Episode: 3594/1000, score: 70, cumulative reward: -214.08967796392352
Episode: 3595/1000, score: 107, cumulative reward: -666.6322935341875
Episode: 3596/1000, score: 103, cumulative reward: -61.76282112040643
Episode: 3597/1000, score: 73, cumulative reward: -215.16869565183052
Episode: 3598/1000, score: 77, cumulative reward: -386.0707969195334
Episode: 3599/1000, score: 108, cumulative reward: -367.2197181852815
Episode: 3600/1000, score: 76, cumulative reward: -313.11464123591514
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3601/1000, score: 72, cumulative reward: -355.48866382600505
Episode: 3602/1000, score: 72, cumulative reward: -109.62848736722012
Episode: 3603/1000, score: 79, cumulative reward: -257.08080117169015
Episode: 3604/1000, score: 69, cumulative reward: -249.6998277544786
Episode: 3605/1000, score: 93, cumulative reward: -134.80242877722358
Episode: 3606/1000, score: 81, cumulative reward: -13.509203430574772
Episode: 3607/1000, score: 58, cumulative reward: -198.19580690187252
Episode: 3608/1000, score: 71, cumulative reward: -20.673407879099884
Episode: 3609/1000, score: 83, cumulative reward: -49.19955779002753
Episode: 3610/1000, score: 53, cumulative reward: -136.9106175153649
Episode: 3611/1000, score: 73, cumulative reward: -216.99058872449132
Episode: 3612/1000, score: 58, cumulative reward: -82.09345436927224
Episode: 3613/1000, score: 73, cumulative reward: -461.8037704894195
Episode: 3614/1000, score: 98, cumulative reward: 15.297508602217349
Episode: 3615/1000, score: 51, cumulative reward: -130.48070355348352
Episode: 3616/1000, score: 75, cumulative reward: -115.63940340227589
Episode: 3617/1000, score: 89, cumulative reward: -285.37497607531657
Episode: 3618/1000, score: 65, cumulative reward: -163.67479855788577
Episode: 3619/1000, score: 60, cumulative reward: -204.21491724861372
Episode: 3620/1000, score: 60, cumulative reward: -91.99877973807148
Episode: 3621/1000, score: 90, cumulative reward: -222.60957045006205
Episode: 3622/1000, score: 53, cumulative reward: -184.53385538731794
Episode: 3623/1000, score: 84, cumulative reward: -189.7916048402196
Episode: 3624/1000, score: 112, cumulative reward: -128.04327376372356
Episode: 3625/1000, score: 81, cumulative reward: -90.42787672708376
Episode: 3626/1000, score: 74, cumulative reward: -202.1348297257449
Episode: 3627/1000, score: 81, cumulative reward: -352.86422252507793
Episode: 3628/1000, score: 74, cumulative reward: -660.1909765700834
Episode: 3629/1000, score: 101, cumulative reward: -60.191154059163665
Episode: 3630/1000, score: 74, cumulative reward: -522.3784054947686
Episode: 3631/1000, score: 96, cumulative reward: -127.66773476886773
Episode: 3632/1000, score: 58, cumulative reward: -123.71209723014115
Episode: 3633/1000, score: 123, cumulative reward: -471.1511583617533
Episode: 3634/1000, score: 123, cumulative reward: -500.6412186105581
Episode: 3635/1000, score: 85, cumulative reward: -101.39152447779799
Episode: 3636/1000, score: 70, cumulative reward: -311.25789225481395
Episode: 3637/1000, score: 130, cumulative reward: -14.203715458470711
Episode: 3638/1000, score: 89, cumulative reward: -78.58564666654729
Episode: 3639/1000, score: 77, cumulative reward: -62.76685883325275
Episode: 3640/1000, score: 91, cumulative reward: -6.595066819540733
Episode: 3641/1000, score: 96, cumulative reward: -173.39266021952253
Episode: 3642/1000, score: 80, cumulative reward: -203.02960425290416
Episode: 3643/1000, score: 114, cumulative reward: -60.60228505656114
Episode: 3644/1000, score: 87, cumulative reward: -272.39488255032154
Episode: 3645/1000, score: 51, cumulative reward: -90.33346204531858
Episode: 3646/1000, score: 119, cumulative reward: -163.22068127025122
Episode: 3647/1000, score: 105, cumulative reward: -185.1671170515534
Episode: 3648/1000, score: 128, cumulative reward: -75.48363173316308
Episode: 3649/1000, score: 156, cumulative reward: -246.5043647143601
Episode: 3650/1000, score: 79, cumulative reward: -345.96608327873497
Episode: 3651/1000, score: 122, cumulative reward: -53.67263845119233
Episode: 3652/1000, score: 134, cumulative reward: 20.26941254394265
Episode: 3653/1000, score: 59, cumulative reward: -369.78693826637567
Episode: 3654/1000, score: 52, cumulative reward: -140.3745885780425
Episode: 3655/1000, score: 113, cumulative reward: -89.50088596384646
Episode: 3656/1000, score: 115, cumulative reward: -143.0101315280716
Episode: 3657/1000, score: 113, cumulative reward: -61.44694747967406
Episode: 3658/1000, score: 93, cumulative reward: -249.49689550707126
Episode: 3659/1000, score: 86, cumulative reward: -57.86457353054838
Episode: 3660/1000, score: 109, cumulative reward: -419.4093487875266
Episode: 3661/1000, score: 88, cumulative reward: -226.91371443781506
Episode: 3662/1000, score: 121, cumulative reward: -182.1255315742442
Episode: 3663/1000, score: 85, cumulative reward: -131.86029637652777
Episode: 3664/1000, score: 68, cumulative reward: -238.96886977862414
Episode: 3665/1000, score: 81, cumulative reward: -332.7797585222313
Episode: 3666/1000, score: 94, cumulative reward: -133.1143671020866
Episode: 3667/1000, score: 97, cumulative reward: -350.639789704724
Episode: 3668/1000, score: 77, cumulative reward: -45.26372705763122
Episode: 3669/1000, score: 231, cumulative reward: -281.93484821182096
Episode: 3670/1000, score: 87, cumulative reward: -169.6196258669247
Episode: 3671/1000, score: 92, cumulative reward: -94.89365740478958
Episode: 3672/1000, score: 173, cumulative reward: 19.72060929765331
Episode: 3673/1000, score: 98, cumulative reward: -91.11096394232486
Episode: 3674/1000, score: 99, cumulative reward: -104.72564199807493
Episode: 3675/1000, score: 76, cumulative reward: -171.54489445357308
Episode: 3676/1000, score: 87, cumulative reward: -125.73813112775667
Episode: 3677/1000, score: 154, cumulative reward: -254.5886656602241
Episode: 3678/1000, score: 67, cumulative reward: -482.8664093931504
Episode: 3679/1000, score: 90, cumulative reward: -318.63755687983894
Episode: 3680/1000, score: 215, cumulative reward: -300.2795983192134
Episode: 3681/1000, score: 118, cumulative reward: -269.4729264661257
Episode: 3682/1000, score: 330, cumulative reward: -303.79204681505666
Episode: 3683/1000, score: 107, cumulative reward: -334.04587227528356
Episode: 3684/1000, score: 168, cumulative reward: -270.8497906682236
Episode: 3685/1000, score: 96, cumulative reward: -555.0902400030136
Episode: 3686/1000, score: 213, cumulative reward: -550.8488015187125
Episode: 3687/1000, score: 102, cumulative reward: -768.1779761647514
Episode: 3688/1000, score: 100, cumulative reward: -192.47600373258064
Episode: 3689/1000, score: 76, cumulative reward: -321.77282594270065
Episode: 3690/1000, score: 134, cumulative reward: -347.37379297969426
Episode: 3691/1000, score: 77, cumulative reward: -435.3955450583764
Episode: 3692/1000, score: 99, cumulative reward: -179.20114867839658
Episode: 3693/1000, score: 190, cumulative reward: -315.79259267468683
Episode: 3694/1000, score: 205, cumulative reward: -376.38906228186687
Episode: 3695/1000, score: 408, cumulative reward: -357.22162939808175
Episode: 3696/1000, score: 64, cumulative reward: -265.29068747344843
Episode: 3697/1000, score: 173, cumulative reward: -237.2257843008934
Episode: 3698/1000, score: 263, cumulative reward: -361.27003796337203
Episode: 3699/1000, score: 80, cumulative reward: -246.30623293439007
Episode: 3700/1000, score: 203, cumulative reward: -225.66816298653464
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3701/1000, score: 224, cumulative reward: -307.15597214287743
Episode: 3702/1000, score: 151, cumulative reward: -332.27822586242064
Episode: 3703/1000, score: 159, cumulative reward: -302.07733149610726
Episode: 3704/1000, score: 147, cumulative reward: -238.49817234590085
Episode: 3705/1000, score: 186, cumulative reward: -205.8656571935839
Episode: 3706/1000, score: 552, cumulative reward: -567.7025394055903
Episode: 3707/1000, score: 524, cumulative reward: -375.0099414966166
Episode: 3708/1000, score: 312, cumulative reward: -330.29323604404374
Episode: 3709/1000, score: 392, cumulative reward: -453.600417175675
Episode: 3710/1000, score: 221, cumulative reward: -292.2248688154245
Episode: 3711/1000, score: 668, cumulative reward: -405.14120680140496
Episode: 3712/1000, score: 604, cumulative reward: -455.75440489676197
Episode: 3713/1000, score: 157, cumulative reward: -374.8957553941668
Episode: 3714/1000, score: 320, cumulative reward: -434.91137562852947
Episode: 3715/1000, score: 586, cumulative reward: -432.53943289075755
Episode: 3716/1000, score: 371, cumulative reward: -396.5848856272715
Episode: 3717/1000, score: 445, cumulative reward: -367.5269318987777
Episode: 3718/1000, score: 388, cumulative reward: -32.32824826432015
Episode: 3719/1000, score: 528, cumulative reward: -197.4565131282037
Episode: 3720/1000, score: 212, cumulative reward: -525.9852337957337
Episode: 3721/1000, score: 245, cumulative reward: -214.05444539243604
Episode: 3722/1000, score: 103, cumulative reward: -400.25936614556514
Episode: 3723/1000, score: 163, cumulative reward: -317.9807227275193
Episode: 3724/1000, score: 94, cumulative reward: -129.84094372428495
Episode: 3725/1000, score: 134, cumulative reward: -106.38340186560669
Episode: 3726/1000, score: 56, cumulative reward: -176.76660514754013
Episode: 3727/1000, score: 114, cumulative reward: -155.47746395239272
Episode: 3728/1000, score: 95, cumulative reward: 18.203188901459455
Episode: 3729/1000, score: 170, cumulative reward: -470.47797882309226
Episode: 3730/1000, score: 173, cumulative reward: -127.42017290232309
Episode: 3731/1000, score: 92, cumulative reward: -296.21067252248554
Episode: 3732/1000, score: 92, cumulative reward: -471.03206520175894
Episode: 3733/1000, score: 75, cumulative reward: -470.20122671509364
Episode: 3734/1000, score: 58, cumulative reward: -278.84164912932283
Episode: 3735/1000, score: 84, cumulative reward: -312.04821004841074
Episode: 3736/1000, score: 51, cumulative reward: -261.18848480137774
Episode: 3737/1000, score: 79, cumulative reward: -391.2744253498892
Episode: 3738/1000, score: 155, cumulative reward: -261.513650901753
Episode: 3739/1000, score: 94, cumulative reward: -204.12616750709103
Episode: 3740/1000, score: 95, cumulative reward: -326.4843244723401
Episode: 3741/1000, score: 125, cumulative reward: -187.4959755793338
Episode: 3742/1000, score: 88, cumulative reward: -142.74870017166722
Episode: 3743/1000, score: 95, cumulative reward: -331.7378074982445
Episode: 3744/1000, score: 88, cumulative reward: -246.07069810937
Episode: 3745/1000, score: 89, cumulative reward: -235.89322661348024
Episode: 3746/1000, score: 89, cumulative reward: -436.135396626756
Episode: 3747/1000, score: 131, cumulative reward: -262.205516580005
Episode: 3748/1000, score: 136, cumulative reward: -276.5050148859231
Episode: 3749/1000, score: 59, cumulative reward: -205.5761918171692
Episode: 3750/1000, score: 84, cumulative reward: -594.7126113953068
Episode: 3751/1000, score: 69, cumulative reward: -501.61925094066765
Episode: 3752/1000, score: 53, cumulative reward: -367.8332791909145
Episode: 3753/1000, score: 229, cumulative reward: -642.0013077945135
Episode: 3754/1000, score: 66, cumulative reward: -259.94147890846034
Episode: 3755/1000, score: 65, cumulative reward: -372.2935790227976
Episode: 3756/1000, score: 351, cumulative reward: -402.8083165789296
Episode: 3757/1000, score: 54, cumulative reward: -503.30676205469763
Episode: 3758/1000, score: 147, cumulative reward: -158.4764794370174
Episode: 3759/1000, score: 174, cumulative reward: -268.72325493410176
Episode: 3760/1000, score: 158, cumulative reward: -98.39769973775267
Episode: 3761/1000, score: 171, cumulative reward: -552.0161124114837
Episode: 3762/1000, score: 282, cumulative reward: -332.3392146995836
Episode: 3763/1000, score: 95, cumulative reward: -338.1515451425578
Episode: 3764/1000, score: 120, cumulative reward: -333.26758506085446
Episode: 3765/1000, score: 126, cumulative reward: -425.53290988481956
Episode: 3766/1000, score: 343, cumulative reward: -882.4716897162066
Episode: 3767/1000, score: 159, cumulative reward: -346.6473782627954
Episode: 3768/1000, score: 280, cumulative reward: -289.4051426284003
Episode: 3769/1000, score: 234, cumulative reward: -580.2841706358843
Episode: 3770/1000, score: 135, cumulative reward: -358.4973926445441
Episode: 3771/1000, score: 718, cumulative reward: -219.7223985187372
Episode: 3772/1000, score: 220, cumulative reward: -599.8226021617468
Episode: 3773/1000, score: 120, cumulative reward: -248.5950422617164
Episode: 3774/1000, score: 181, cumulative reward: -174.20135507406223
Episode: 3775/1000, score: 88, cumulative reward: -201.15924431774164
Episode: 3776/1000, score: 110, cumulative reward: -336.4765668035424
Episode: 3777/1000, score: 270, cumulative reward: -491.4116592827668
Episode: 3778/1000, score: 227, cumulative reward: -239.42146906328463
Episode: 3779/1000, score: 52, cumulative reward: -81.22585705120431
Episode: 3780/1000, score: 84, cumulative reward: -192.16274954902195
Episode: 3781/1000, score: 60, cumulative reward: -96.45831456170788
Episode: 3782/1000, score: 94, cumulative reward: -289.0389339868733
Episode: 3783/1000, score: 68, cumulative reward: -138.4795832565506
Episode: 3784/1000, score: 139, cumulative reward: -320.70699054606723
Episode: 3785/1000, score: 113, cumulative reward: -181.7584497192485
Episode: 3786/1000, score: 151, cumulative reward: -211.3396678277203
Episode: 3787/1000, score: 222, cumulative reward: -126.82199679949602
Episode: 3788/1000, score: 108, cumulative reward: -319.01236726217434
Episode: 3789/1000, score: 128, cumulative reward: -225.99170576943987
Episode: 3790/1000, score: 133, cumulative reward: -502.96378677558613
Episode: 3791/1000, score: 128, cumulative reward: -267.75474753021615
Episode: 3792/1000, score: 93, cumulative reward: -295.69402703100116
Episode: 3793/1000, score: 155, cumulative reward: -58.39579706803618
Episode: 3794/1000, score: 121, cumulative reward: -261.86441094588804
Episode: 3795/1000, score: 86, cumulative reward: -328.99395599514787
Episode: 3796/1000, score: 102, cumulative reward: -224.95427619583887
Episode: 3797/1000, score: 110, cumulative reward: -202.5678256610858
Episode: 3798/1000, score: 176, cumulative reward: -55.41963439438307
Episode: 3799/1000, score: 132, cumulative reward: -299.1310454625913
Episode: 3800/1000, score: 136, cumulative reward: -115.46139831012924
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3801/1000, score: 101, cumulative reward: -418.9471910549262
Episode: 3802/1000, score: 225, cumulative reward: -211.89132324250704
Episode: 3803/1000, score: 172, cumulative reward: -287.8590347929269
Episode: 3804/1000, score: 184, cumulative reward: -211.86419075913415
Episode: 3805/1000, score: 121, cumulative reward: -231.03965502548695
Episode: 3806/1000, score: 147, cumulative reward: -196.48744089522464
Episode: 3807/1000, score: 114, cumulative reward: -173.18483695247983
Episode: 3808/1000, score: 545, cumulative reward: -223.19465765322124
Episode: 3809/1000, score: 346, cumulative reward: -655.7941229680596
Episode: 3810/1000, score: 99, cumulative reward: -276.5859196909553
Episode: 3811/1000, score: 173, cumulative reward: -585.5107212104458
Episode: 3812/1000, score: 288, cumulative reward: -593.4278351743142
Episode: 3813/1000, score: 590, cumulative reward: -294.4704533324665
Episode: 3814/1000, score: 252, cumulative reward: -202.6110672125759
Episode: 3815/1000, score: 235, cumulative reward: -524.1456432037716
Episode: 3816/1000, score: 290, cumulative reward: -277.6508012759931
Episode: 3817/1000, score: 372, cumulative reward: -402.87227785613027
Episode: 3818/1000, score: 130, cumulative reward: -351.38728614075796
Episode: 3819/1000, score: 103, cumulative reward: -250.39961391382477
Episode: 3820/1000, score: 145, cumulative reward: -283.055218967826
Episode: 3821/1000, score: 200, cumulative reward: -312.65552204558423
Episode: 3822/1000, score: 199, cumulative reward: -131.11403692747265
Episode: 3823/1000, score: 194, cumulative reward: -358.4890818779556
Episode: 3824/1000, score: 94, cumulative reward: -500.675493631915
Episode: 3825/1000, score: 166, cumulative reward: -106.56015626961874
Episode: 3826/1000, score: 55, cumulative reward: -458.1493859113718
Episode: 3827/1000, score: 49, cumulative reward: -309.9651967462404
Episode: 3828/1000, score: 56, cumulative reward: -387.38569363790253
Episode: 3829/1000, score: 70, cumulative reward: -278.96528244191336
Episode: 3830/1000, score: 90, cumulative reward: -328.60713371993927
Episode: 3831/1000, score: 77, cumulative reward: -637.9445948011025
Episode: 3832/1000, score: 86, cumulative reward: -683.9808820036632
Episode: 3833/1000, score: 65, cumulative reward: -411.31872478763137
Episode: 3834/1000, score: 62, cumulative reward: -128.0248434757564
Episode: 3835/1000, score: 77, cumulative reward: -119.98461534305139
Episode: 3836/1000, score: 61, cumulative reward: -498.42897730403075
Episode: 3837/1000, score: 83, cumulative reward: -810.274805879448
Episode: 3838/1000, score: 54, cumulative reward: -225.6555201925071
Episode: 3839/1000, score: 56, cumulative reward: -511.47807438578917
Episode: 3840/1000, score: 64, cumulative reward: -449.6564775373178
Episode: 3841/1000, score: 51, cumulative reward: -233.23513704868412
Episode: 3842/1000, score: 63, cumulative reward: -382.96072656108095
Episode: 3843/1000, score: 59, cumulative reward: -424.37913122670716
Episode: 3844/1000, score: 59, cumulative reward: -380.20342965208835
Episode: 3845/1000, score: 63, cumulative reward: -412.97355377278024
Episode: 3846/1000, score: 59, cumulative reward: -474.10378478180803
Episode: 3847/1000, score: 54, cumulative reward: -290.38928194830964
Episode: 3848/1000, score: 64, cumulative reward: -549.7383190182579
Episode: 3849/1000, score: 72, cumulative reward: -563.9450209044896
Episode: 3850/1000, score: 77, cumulative reward: -509.8840461829001
Episode: 3851/1000, score: 56, cumulative reward: -153.73511985682086
Episode: 3852/1000, score: 51, cumulative reward: -113.18692243006632
Episode: 3853/1000, score: 94, cumulative reward: -393.7937319649621
Episode: 3854/1000, score: 62, cumulative reward: -423.8888151907614
Episode: 3855/1000, score: 50, cumulative reward: -204.3310205597066
Episode: 3856/1000, score: 57, cumulative reward: -291.69155953058413
Episode: 3857/1000, score: 65, cumulative reward: -154.68726751849013
Episode: 3858/1000, score: 59, cumulative reward: -113.22463369592217
Episode: 3859/1000, score: 74, cumulative reward: -434.8625689047864
Episode: 3860/1000, score: 57, cumulative reward: -105.99214510244923
Episode: 3861/1000, score: 100, cumulative reward: -304.9757787638554
Episode: 3862/1000, score: 73, cumulative reward: -179.97021591061528
Episode: 3863/1000, score: 74, cumulative reward: -672.3845444888053
Episode: 3864/1000, score: 99, cumulative reward: -313.73235891408626
Episode: 3865/1000, score: 55, cumulative reward: -122.81151896303265
Episode: 3866/1000, score: 73, cumulative reward: -397.1780121631829
Episode: 3867/1000, score: 67, cumulative reward: -204.95805791670298
Episode: 3868/1000, score: 60, cumulative reward: -440.31004999262655
Episode: 3869/1000, score: 66, cumulative reward: -478.8828093734031
Episode: 3870/1000, score: 65, cumulative reward: -178.32119684259644
Episode: 3871/1000, score: 56, cumulative reward: -9.482454321104242
Episode: 3872/1000, score: 63, cumulative reward: -181.59390315697192
Episode: 3873/1000, score: 61, cumulative reward: -231.86150995903887
Episode: 3874/1000, score: 74, cumulative reward: -240.62582272950502
Episode: 3875/1000, score: 65, cumulative reward: -147.0217508392012
Episode: 3876/1000, score: 52, cumulative reward: -215.46802805882044
Episode: 3877/1000, score: 89, cumulative reward: -142.35253639068864
Episode: 3878/1000, score: 65, cumulative reward: -144.65187418258296
Episode: 3879/1000, score: 55, cumulative reward: -195.97743079030414
Episode: 3880/1000, score: 61, cumulative reward: -137.8920904820499
Episode: 3881/1000, score: 59, cumulative reward: -220.769783804111
Episode: 3882/1000, score: 54, cumulative reward: -136.38329324830855
Episode: 3883/1000, score: 85, cumulative reward: -129.94973424501083
Episode: 3884/1000, score: 78, cumulative reward: -201.6475396379155
Episode: 3885/1000, score: 56, cumulative reward: -121.6324419160332
Episode: 3886/1000, score: 95, cumulative reward: -235.60253588107594
Episode: 3887/1000, score: 83, cumulative reward: -549.9186677137817
Episode: 3888/1000, score: 66, cumulative reward: -141.74881772052248
Episode: 3889/1000, score: 101, cumulative reward: -621.234291308943
Episode: 3890/1000, score: 82, cumulative reward: -120.05504237320883
Episode: 3891/1000, score: 76, cumulative reward: -140.50672989098106
Episode: 3892/1000, score: 102, cumulative reward: -346.4535735805135
Episode: 3893/1000, score: 65, cumulative reward: -178.33835618887557
Episode: 3894/1000, score: 61, cumulative reward: -220.99726787876943
Episode: 3895/1000, score: 79, cumulative reward: -342.99889337030913
Episode: 3896/1000, score: 55, cumulative reward: -199.6506673775242
Episode: 3897/1000, score: 80, cumulative reward: -240.0495322193493
Episode: 3898/1000, score: 63, cumulative reward: -244.1581329530712
Episode: 3899/1000, score: 176, cumulative reward: -572.8189133307314
Episode: 3900/1000, score: 66, cumulative reward: -213.12070542299054
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 3901/1000, score: 80, cumulative reward: -240.75554268414368
Episode: 3902/1000, score: 57, cumulative reward: -123.48229993068546
Episode: 3903/1000, score: 91, cumulative reward: -270.81554709252185
Episode: 3904/1000, score: 73, cumulative reward: -286.02625613177156
Episode: 3905/1000, score: 72, cumulative reward: -316.0705155806745
Episode: 3906/1000, score: 60, cumulative reward: -241.80320995904216
Episode: 3907/1000, score: 46, cumulative reward: -301.7692909020617
Episode: 3908/1000, score: 55, cumulative reward: -223.7850282363598
Episode: 3909/1000, score: 90, cumulative reward: -369.58696247996
Episode: 3910/1000, score: 52, cumulative reward: -371.9305791411617
Episode: 3911/1000, score: 63, cumulative reward: -407.8769083154038
Episode: 3912/1000, score: 66, cumulative reward: -608.6588210354552
Episode: 3913/1000, score: 77, cumulative reward: -529.2173995924534
Episode: 3914/1000, score: 54, cumulative reward: -485.18717279287006
Episode: 3915/1000, score: 53, cumulative reward: -383.74043446654855
Episode: 3916/1000, score: 60, cumulative reward: -441.7310309736876
Episode: 3917/1000, score: 67, cumulative reward: -524.1506903159291
Episode: 3918/1000, score: 49, cumulative reward: -439.62036712797476
Episode: 3919/1000, score: 57, cumulative reward: -474.3266716871885
Episode: 3920/1000, score: 83, cumulative reward: -460.60331610570665
Episode: 3921/1000, score: 85, cumulative reward: -649.2643065876447
Episode: 3922/1000, score: 64, cumulative reward: -307.3735621201665
Episode: 3923/1000, score: 81, cumulative reward: -523.0621384752258
Episode: 3924/1000, score: 107, cumulative reward: -838.7958235404019
Episode: 3925/1000, score: 60, cumulative reward: -210.35552362975804
Episode: 3926/1000, score: 64, cumulative reward: -525.619849486422
Episode: 3927/1000, score: 58, cumulative reward: -121.99244334061866
Episode: 3928/1000, score: 61, cumulative reward: -229.54480701972165
Episode: 3929/1000, score: 92, cumulative reward: -724.774943824739
Episode: 3930/1000, score: 78, cumulative reward: -218.57215448715993
Episode: 3931/1000, score: 67, cumulative reward: -449.7037266971098
Episode: 3932/1000, score: 86, cumulative reward: -343.6494524726655
Episode: 3933/1000, score: 53, cumulative reward: -410.0094950758222
Episode: 3934/1000, score: 87, cumulative reward: -389.9944804870807
Episode: 3935/1000, score: 83, cumulative reward: -513.4650029902465
Episode: 3936/1000, score: 51, cumulative reward: -99.32245896496913
Episode: 3937/1000, score: 71, cumulative reward: -356.9195567976025
Episode: 3938/1000, score: 67, cumulative reward: -506.961279756419
Episode: 3939/1000, score: 57, cumulative reward: -120.01292587025841
Episode: 3940/1000, score: 75, cumulative reward: -341.4721593693354
Episode: 3941/1000, score: 114, cumulative reward: -511.9550294341564
Episode: 3942/1000, score: 71, cumulative reward: -472.02495214738013
Episode: 3943/1000, score: 54, cumulative reward: -164.69862539387617
Episode: 3944/1000, score: 99, cumulative reward: -357.1602704468109
Episode: 3945/1000, score: 84, cumulative reward: -396.87278635966663
Episode: 3946/1000, score: 109, cumulative reward: -656.2583780832751
Episode: 3947/1000, score: 59, cumulative reward: -193.51431104599232
Episode: 3948/1000, score: 64, cumulative reward: -365.8169508864549
Episode: 3949/1000, score: 59, cumulative reward: -134.0431967254181
Episode: 3950/1000, score: 73, cumulative reward: -437.5260235659072
Episode: 3951/1000, score: 90, cumulative reward: -542.0251171982491
Episode: 3952/1000, score: 65, cumulative reward: -489.3382511402221
Episode: 3953/1000, score: 72, cumulative reward: -450.9870029073982
Episode: 3954/1000, score: 49, cumulative reward: -326.03097826931344
Episode: 3955/1000, score: 70, cumulative reward: -385.94934957448146
Episode: 3956/1000, score: 97, cumulative reward: -655.9244701218705
Episode: 3957/1000, score: 68, cumulative reward: -489.56195497121416
Episode: 3958/1000, score: 87, cumulative reward: -227.22672046706407
Episode: 3959/1000, score: 74, cumulative reward: -514.3808964620581
Episode: 3960/1000, score: 55, cumulative reward: -363.90460770813485
Episode: 3961/1000, score: 63, cumulative reward: -154.4929833852442
Episode: 3962/1000, score: 56, cumulative reward: -402.3402973718367
Episode: 3963/1000, score: 74, cumulative reward: -530.8981848951585
Episode: 3964/1000, score: 70, cumulative reward: -501.22641097111244
Episode: 3965/1000, score: 67, cumulative reward: -232.6765527038897
Episode: 3966/1000, score: 70, cumulative reward: -454.3697662635173
Episode: 3967/1000, score: 84, cumulative reward: -305.9869692401237
Episode: 3968/1000, score: 94, cumulative reward: -409.54910898944536
Episode: 3969/1000, score: 60, cumulative reward: -465.1061495140343
Episode: 3970/1000, score: 81, cumulative reward: -450.4088704788636
Episode: 3971/1000, score: 65, cumulative reward: -437.29955882610176
Episode: 3972/1000, score: 67, cumulative reward: -463.42519303204074
Episode: 3973/1000, score: 55, cumulative reward: -129.21494560957737
Episode: 3974/1000, score: 77, cumulative reward: -383.951888613321
Episode: 3975/1000, score: 67, cumulative reward: -367.6005901621714
Episode: 3976/1000, score: 80, cumulative reward: -556.643446145805
Episode: 3977/1000, score: 90, cumulative reward: -549.5571041257388
Episode: 3978/1000, score: 106, cumulative reward: -413.81470295238097
Episode: 3979/1000, score: 56, cumulative reward: -375.7185837833743
Episode: 3980/1000, score: 88, cumulative reward: -302.738252203857
Episode: 3981/1000, score: 69, cumulative reward: -421.5950901333428
Episode: 3982/1000, score: 77, cumulative reward: -374.6079044363809
Episode: 3983/1000, score: 58, cumulative reward: -183.3939980567062
Episode: 3984/1000, score: 90, cumulative reward: -34.37750784972185
Episode: 3985/1000, score: 67, cumulative reward: -495.47649149378003
Episode: 3986/1000, score: 73, cumulative reward: -612.0927165372855
Episode: 3987/1000, score: 76, cumulative reward: -405.0956271431631
Episode: 3988/1000, score: 68, cumulative reward: -538.2554096799296
Episode: 3989/1000, score: 82, cumulative reward: -445.5009018409463
Episode: 3990/1000, score: 83, cumulative reward: -687.7419104154116
Episode: 3991/1000, score: 72, cumulative reward: -288.80439847259794
Episode: 3992/1000, score: 51, cumulative reward: -218.14244366682578
Episode: 3993/1000, score: 88, cumulative reward: -449.83938746314345
Episode: 3994/1000, score: 65, cumulative reward: -129.74529340822824
Episode: 3995/1000, score: 66, cumulative reward: -272.38688434939604
Episode: 3996/1000, score: 85, cumulative reward: -417.1678325307578
Episode: 3997/1000, score: 54, cumulative reward: -190.59029311017963
Episode: 3998/1000, score: 57, cumulative reward: -337.27379527705205
Episode: 3999/1000, score: 72, cumulative reward: -185.3376743616945
Episode: 4000/1000, score: 56, cumulative reward: -254.1172219176558
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4001/1000, score: 89, cumulative reward: -173.43597369408187
Episode: 4002/1000, score: 70, cumulative reward: -527.6667955290146
Episode: 4003/1000, score: 67, cumulative reward: -275.3325329120637
Episode: 4004/1000, score: 58, cumulative reward: -137.34051216119713
Episode: 4005/1000, score: 58, cumulative reward: -266.93080897974926
Episode: 4006/1000, score: 89, cumulative reward: -334.8143859421797
Episode: 4007/1000, score: 58, cumulative reward: -179.63999846122658
Episode: 4008/1000, score: 52, cumulative reward: -316.33767275725984
Episode: 4009/1000, score: 65, cumulative reward: -401.92621159452
Episode: 4010/1000, score: 56, cumulative reward: -352.68627965103906
Episode: 4011/1000, score: 69, cumulative reward: -549.6649549361815
Episode: 4012/1000, score: 60, cumulative reward: -483.373934171138
Episode: 4013/1000, score: 59, cumulative reward: -157.67893823505176
Episode: 4014/1000, score: 92, cumulative reward: -468.0296446716267
Episode: 4015/1000, score: 76, cumulative reward: -519.0141103339065
Episode: 4016/1000, score: 69, cumulative reward: -232.99352895850058
Episode: 4017/1000, score: 64, cumulative reward: -367.2231092085555
Episode: 4018/1000, score: 74, cumulative reward: -203.000846913268
Episode: 4019/1000, score: 52, cumulative reward: -178.60296690011626
Episode: 4020/1000, score: 64, cumulative reward: -157.34092866175698
Episode: 4021/1000, score: 56, cumulative reward: -204.85372881633825
Episode: 4022/1000, score: 94, cumulative reward: -452.9129390862042
Episode: 4023/1000, score: 81, cumulative reward: -520.7043946421102
Episode: 4024/1000, score: 90, cumulative reward: -180.52189844336516
Episode: 4025/1000, score: 108, cumulative reward: -584.5062378501251
Episode: 4026/1000, score: 85, cumulative reward: -358.6460176761005
Episode: 4027/1000, score: 68, cumulative reward: -497.8815675793974
Episode: 4028/1000, score: 50, cumulative reward: -170.89504939226276
Episode: 4029/1000, score: 75, cumulative reward: -523.0210715565053
Episode: 4030/1000, score: 93, cumulative reward: -642.3791750306892
Episode: 4031/1000, score: 70, cumulative reward: -383.0177331277741
Episode: 4032/1000, score: 65, cumulative reward: -209.75382869630243
Episode: 4033/1000, score: 81, cumulative reward: -317.5162928696575
Episode: 4034/1000, score: 52, cumulative reward: -255.8442162261367
Episode: 4035/1000, score: 56, cumulative reward: -100.46674434258105
Episode: 4036/1000, score: 85, cumulative reward: -493.5562123732111
Episode: 4037/1000, score: 98, cumulative reward: -18.58220724371762
Episode: 4038/1000, score: 57, cumulative reward: -247.2501267946218
Episode: 4039/1000, score: 76, cumulative reward: -249.0351339890398
Episode: 4040/1000, score: 61, cumulative reward: -254.05446044834179
Episode: 4041/1000, score: 65, cumulative reward: -296.20831285537724
Episode: 4042/1000, score: 77, cumulative reward: -286.4715736733435
Episode: 4043/1000, score: 72, cumulative reward: -185.43736820628038
Episode: 4044/1000, score: 106, cumulative reward: -224.41886307902044
Episode: 4045/1000, score: 67, cumulative reward: -244.37369651593298
Episode: 4046/1000, score: 96, cumulative reward: -464.8721271060341
Episode: 4047/1000, score: 54, cumulative reward: -104.7768061018844
Episode: 4048/1000, score: 65, cumulative reward: -186.8426403628618
Episode: 4049/1000, score: 89, cumulative reward: -613.0026345729075
Episode: 4050/1000, score: 110, cumulative reward: -309.1473856509508
Episode: 4051/1000, score: 89, cumulative reward: -416.0588709601427
Episode: 4052/1000, score: 57, cumulative reward: -298.2773880929729
Episode: 4053/1000, score: 94, cumulative reward: -238.6250360066071
Episode: 4054/1000, score: 59, cumulative reward: -357.86560940724837
Episode: 4055/1000, score: 71, cumulative reward: -19.868368373526735
Episode: 4056/1000, score: 92, cumulative reward: -253.54371029232183
Episode: 4057/1000, score: 89, cumulative reward: -317.62194201962507
Episode: 4058/1000, score: 89, cumulative reward: -261.15675624474125
Episode: 4059/1000, score: 94, cumulative reward: -327.9256260150322
Episode: 4060/1000, score: 81, cumulative reward: -429.4729353715188
Episode: 4061/1000, score: 63, cumulative reward: -150.46861218293915
Episode: 4062/1000, score: 88, cumulative reward: -128.73254720903145
Episode: 4063/1000, score: 94, cumulative reward: -509.5434229714369
Episode: 4064/1000, score: 58, cumulative reward: -234.63314012047792
Episode: 4065/1000, score: 89, cumulative reward: -109.15323738261426
Episode: 4066/1000, score: 76, cumulative reward: -142.48665486679656
Episode: 4067/1000, score: 57, cumulative reward: -193.99366894640048
Episode: 4068/1000, score: 65, cumulative reward: -140.84045443209897
Episode: 4069/1000, score: 54, cumulative reward: -263.69309141501896
Episode: 4070/1000, score: 53, cumulative reward: -304.24487778749153
Episode: 4071/1000, score: 95, cumulative reward: -191.68201856701242
Episode: 4072/1000, score: 76, cumulative reward: -140.79386075704753
Episode: 4073/1000, score: 82, cumulative reward: -245.57976005284755
Episode: 4074/1000, score: 72, cumulative reward: -183.23040407075675
Episode: 4075/1000, score: 86, cumulative reward: -362.6579123694634
Episode: 4076/1000, score: 66, cumulative reward: -186.84550999747938
Episode: 4077/1000, score: 79, cumulative reward: -176.5720032337607
Episode: 4078/1000, score: 62, cumulative reward: -296.41861174593436
Episode: 4079/1000, score: 79, cumulative reward: -135.42769864593097
Episode: 4080/1000, score: 84, cumulative reward: -192.55292170416914
Episode: 4081/1000, score: 85, cumulative reward: -18.91402926529051
Episode: 4082/1000, score: 54, cumulative reward: -194.8294028406586
Episode: 4083/1000, score: 69, cumulative reward: -211.7911761435579
Episode: 4084/1000, score: 79, cumulative reward: -147.14896742663151
Episode: 4085/1000, score: 98, cumulative reward: -239.56239030978207
Episode: 4086/1000, score: 77, cumulative reward: -361.73919054473595
Episode: 4087/1000, score: 63, cumulative reward: -278.64408285542976
Episode: 4088/1000, score: 52, cumulative reward: -240.39634332027026
Episode: 4089/1000, score: 95, cumulative reward: -267.0292673713235
Episode: 4090/1000, score: 89, cumulative reward: -51.9694035202636
Episode: 4091/1000, score: 52, cumulative reward: -351.4148875824455
Episode: 4092/1000, score: 57, cumulative reward: -408.66389410512073
Episode: 4093/1000, score: 100, cumulative reward: -518.9091638447755
Episode: 4094/1000, score: 65, cumulative reward: -131.84912171508586
Episode: 4095/1000, score: 66, cumulative reward: -159.27075810521987
Episode: 4096/1000, score: 77, cumulative reward: -306.746054050653
Episode: 4097/1000, score: 66, cumulative reward: -129.499299353176
Episode: 4098/1000, score: 78, cumulative reward: -559.270968010286
Episode: 4099/1000, score: 72, cumulative reward: -513.1695825803208
Episode: 4100/1000, score: 90, cumulative reward: -522.7938299824799
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4101/1000, score: 113, cumulative reward: -449.61903719005073
Episode: 4102/1000, score: 75, cumulative reward: -194.5373105848721
Episode: 4103/1000, score: 91, cumulative reward: -141.50008300451776
Episode: 4104/1000, score: 118, cumulative reward: -972.7645952748504
Episode: 4105/1000, score: 107, cumulative reward: -628.8175054473595
Episode: 4106/1000, score: 72, cumulative reward: -282.4124455014172
Episode: 4107/1000, score: 78, cumulative reward: -180.51302463480312
Episode: 4108/1000, score: 57, cumulative reward: -303.3771751619142
Episode: 4109/1000, score: 52, cumulative reward: -246.95388482278437
Episode: 4110/1000, score: 59, cumulative reward: -351.45284625834404
Episode: 4111/1000, score: 78, cumulative reward: -170.0202301391754
Episode: 4112/1000, score: 69, cumulative reward: -222.52085121194548
Episode: 4113/1000, score: 68, cumulative reward: -138.35010485542057
Episode: 4114/1000, score: 55, cumulative reward: -360.09169622634215
Episode: 4115/1000, score: 131, cumulative reward: -758.7424064298515
Episode: 4116/1000, score: 72, cumulative reward: -262.2233426401616
Episode: 4117/1000, score: 56, cumulative reward: -305.1133626770435
Episode: 4118/1000, score: 61, cumulative reward: -92.24645941043498
Episode: 4119/1000, score: 110, cumulative reward: -26.2530430410677
Episode: 4120/1000, score: 60, cumulative reward: -252.2291527875877
Episode: 4121/1000, score: 60, cumulative reward: -302.94014562912673
Episode: 4122/1000, score: 70, cumulative reward: -391.5471500112737
Episode: 4123/1000, score: 105, cumulative reward: -553.7869475052238
Episode: 4124/1000, score: 80, cumulative reward: -270.1260017378898
Episode: 4125/1000, score: 60, cumulative reward: -212.61242102716693
Episode: 4126/1000, score: 74, cumulative reward: -143.54296672550782
Episode: 4127/1000, score: 56, cumulative reward: -236.92358225121558
Episode: 4128/1000, score: 63, cumulative reward: -248.9698251503452
Episode: 4129/1000, score: 86, cumulative reward: -373.7301001101425
Episode: 4130/1000, score: 91, cumulative reward: -182.91330930733736
Episode: 4131/1000, score: 66, cumulative reward: -231.87518429225196
Episode: 4132/1000, score: 62, cumulative reward: -128.10596113710244
Episode: 4133/1000, score: 72, cumulative reward: -380.2402432420557
Episode: 4134/1000, score: 88, cumulative reward: -294.5723835932645
Episode: 4135/1000, score: 73, cumulative reward: -318.1088565985034
Episode: 4136/1000, score: 64, cumulative reward: -291.58802506542236
Episode: 4137/1000, score: 84, cumulative reward: -551.2887540255351
Episode: 4138/1000, score: 136, cumulative reward: -1079.9372217994096
Episode: 4139/1000, score: 81, cumulative reward: -112.46649231376679
Episode: 4140/1000, score: 55, cumulative reward: -171.00399842235078
Episode: 4141/1000, score: 74, cumulative reward: -54.10677375185557
Episode: 4142/1000, score: 117, cumulative reward: -530.7819271871529
Episode: 4143/1000, score: 87, cumulative reward: -388.72886728903467
Episode: 4144/1000, score: 51, cumulative reward: -118.09795826316366
Episode: 4145/1000, score: 74, cumulative reward: -44.37683640362219
Episode: 4146/1000, score: 62, cumulative reward: -212.58047213674195
Episode: 4147/1000, score: 58, cumulative reward: -223.49497125220933
Episode: 4148/1000, score: 87, cumulative reward: -379.9157980123259
Episode: 4149/1000, score: 116, cumulative reward: -126.79221215027619
Episode: 4150/1000, score: 87, cumulative reward: -442.8710209684543
Episode: 4151/1000, score: 68, cumulative reward: -392.6473679692814
Episode: 4152/1000, score: 58, cumulative reward: -207.0049476272498
Episode: 4153/1000, score: 70, cumulative reward: -285.0366085730789
Episode: 4154/1000, score: 85, cumulative reward: -486.3198583415986
Episode: 4155/1000, score: 53, cumulative reward: -100.42051776400164
Episode: 4156/1000, score: 65, cumulative reward: -289.4970474183258
Episode: 4157/1000, score: 73, cumulative reward: -224.88082629634658
Episode: 4158/1000, score: 70, cumulative reward: -302.8168125212954
Episode: 4159/1000, score: 103, cumulative reward: -266.5788851213945
Episode: 4160/1000, score: 80, cumulative reward: -627.9723675366117
Episode: 4161/1000, score: 104, cumulative reward: -314.71697417360076
Episode: 4162/1000, score: 77, cumulative reward: 13.324980049530708
Episode: 4163/1000, score: 76, cumulative reward: -254.9114282695933
Episode: 4164/1000, score: 117, cumulative reward: -794.6654926044355
Episode: 4165/1000, score: 147, cumulative reward: -612.6887444576533
Episode: 4166/1000, score: 148, cumulative reward: -708.1182694167295
Episode: 4167/1000, score: 104, cumulative reward: -286.57540336096395
Episode: 4168/1000, score: 100, cumulative reward: -159.63463436082134
Episode: 4169/1000, score: 75, cumulative reward: -558.3274068197643
Episode: 4170/1000, score: 52, cumulative reward: -223.81761166895103
Episode: 4171/1000, score: 84, cumulative reward: -215.61278561937416
Episode: 4172/1000, score: 57, cumulative reward: -246.94039659641487
Episode: 4173/1000, score: 55, cumulative reward: -104.5249618921149
Episode: 4174/1000, score: 100, cumulative reward: -367.0238993937659
Episode: 4175/1000, score: 101, cumulative reward: -164.53144042070764
Episode: 4176/1000, score: 51, cumulative reward: -194.53170054855082
Episode: 4177/1000, score: 91, cumulative reward: -663.0870241790979
Episode: 4178/1000, score: 65, cumulative reward: -388.94709938774605
Episode: 4179/1000, score: 334, cumulative reward: 17.02705624607823
Episode: 4180/1000, score: 69, cumulative reward: -251.47813997690818
Episode: 4181/1000, score: 65, cumulative reward: -248.9273379801017
Episode: 4182/1000, score: 69, cumulative reward: -3.7758730681702417
Episode: 4183/1000, score: 67, cumulative reward: -227.0123557335043
Episode: 4184/1000, score: 83, cumulative reward: -262.0366259921201
Episode: 4185/1000, score: 95, cumulative reward: -162.6611405953745
Episode: 4186/1000, score: 102, cumulative reward: -293.33709958521854
Episode: 4187/1000, score: 55, cumulative reward: -289.46455096490064
Episode: 4188/1000, score: 74, cumulative reward: -480.1942437593106
Episode: 4189/1000, score: 89, cumulative reward: -264.63632700529257
Episode: 4190/1000, score: 70, cumulative reward: -476.45767061332185
Episode: 4191/1000, score: 53, cumulative reward: -267.02002219117395
Episode: 4192/1000, score: 77, cumulative reward: -243.08589076850726
Episode: 4193/1000, score: 109, cumulative reward: -283.5504912070394
Episode: 4194/1000, score: 60, cumulative reward: -239.88137877003848
Episode: 4195/1000, score: 52, cumulative reward: -266.4741205927879
Episode: 4196/1000, score: 54, cumulative reward: -234.76107710680287
Episode: 4197/1000, score: 57, cumulative reward: -141.28680316038773
Episode: 4198/1000, score: 85, cumulative reward: -95.78983264315977
Episode: 4199/1000, score: 58, cumulative reward: -324.4348761925188
Episode: 4200/1000, score: 64, cumulative reward: -193.37094125987602
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4201/1000, score: 79, cumulative reward: -46.78573894775442
Episode: 4202/1000, score: 65, cumulative reward: -193.11180916383023
Episode: 4203/1000, score: 69, cumulative reward: -407.78517015605195
Episode: 4204/1000, score: 92, cumulative reward: -290.2811297457565
Episode: 4205/1000, score: 157, cumulative reward: -362.7821187742724
Episode: 4206/1000, score: 74, cumulative reward: -249.52596844507656
Episode: 4207/1000, score: 57, cumulative reward: -325.6986378212922
Episode: 4208/1000, score: 73, cumulative reward: -494.4312348740595
Episode: 4209/1000, score: 64, cumulative reward: -215.82925233476882
Episode: 4210/1000, score: 62, cumulative reward: -284.0150735383788
Episode: 4211/1000, score: 61, cumulative reward: -360.8056286783506
Episode: 4212/1000, score: 62, cumulative reward: -222.8748668218994
Episode: 4213/1000, score: 89, cumulative reward: -336.5095187104732
Episode: 4214/1000, score: 70, cumulative reward: -406.8058421886451
Episode: 4215/1000, score: 87, cumulative reward: -33.52811988085794
Episode: 4216/1000, score: 129, cumulative reward: -392.8173031858999
Episode: 4217/1000, score: 50, cumulative reward: -102.42214568714265
Episode: 4218/1000, score: 87, cumulative reward: -150.28926639786135
Episode: 4219/1000, score: 57, cumulative reward: -414.43877955467707
Episode: 4220/1000, score: 77, cumulative reward: -278.66869680799255
Episode: 4221/1000, score: 93, cumulative reward: -288.86302352161067
Episode: 4222/1000, score: 122, cumulative reward: -523.8139432663031
Episode: 4223/1000, score: 57, cumulative reward: -254.69760839176539
Episode: 4224/1000, score: 105, cumulative reward: -109.39204779822225
Episode: 4225/1000, score: 56, cumulative reward: -135.95397992361626
Episode: 4226/1000, score: 89, cumulative reward: -199.3383444465125
Episode: 4227/1000, score: 83, cumulative reward: -585.6343342376942
Episode: 4228/1000, score: 123, cumulative reward: -27.8866459742663
Episode: 4229/1000, score: 91, cumulative reward: -29.706838157864993
Episode: 4230/1000, score: 116, cumulative reward: -588.852977319227
Episode: 4231/1000, score: 97, cumulative reward: -261.4740537171841
Episode: 4232/1000, score: 62, cumulative reward: -306.3937953258056
Episode: 4233/1000, score: 81, cumulative reward: -170.90668621790311
Episode: 4234/1000, score: 81, cumulative reward: -123.13782141284452
Episode: 4235/1000, score: 155, cumulative reward: 49.03379208606981
Episode: 4236/1000, score: 66, cumulative reward: -159.91902288659355
Episode: 4237/1000, score: 70, cumulative reward: -165.1133417016241
Episode: 4238/1000, score: 80, cumulative reward: -108.96227110075955
Episode: 4239/1000, score: 51, cumulative reward: -182.4653866748106
Episode: 4240/1000, score: 74, cumulative reward: -142.41308139721832
Episode: 4241/1000, score: 54, cumulative reward: -120.90746885714086
Episode: 4242/1000, score: 143, cumulative reward: -225.38009828339673
Episode: 4243/1000, score: 76, cumulative reward: -372.1851259415254
Episode: 4244/1000, score: 66, cumulative reward: -193.02959679981262
Episode: 4245/1000, score: 95, cumulative reward: -51.15523678669131
Episode: 4246/1000, score: 85, cumulative reward: -195.81441402512024
Episode: 4247/1000, score: 132, cumulative reward: -211.82598613885594
Episode: 4248/1000, score: 78, cumulative reward: -78.42470963269909
Episode: 4249/1000, score: 77, cumulative reward: -149.39353935121227
Episode: 4250/1000, score: 75, cumulative reward: -126.44391366317178
Episode: 4251/1000, score: 73, cumulative reward: -122.39590827247247
Episode: 4252/1000, score: 91, cumulative reward: -195.48502891295072
Episode: 4253/1000, score: 102, cumulative reward: -151.47248443058908
Episode: 4254/1000, score: 87, cumulative reward: -173.04755692838805
Episode: 4255/1000, score: 56, cumulative reward: -146.61786756437016
Episode: 4256/1000, score: 57, cumulative reward: -67.99339559193231
Episode: 4257/1000, score: 85, cumulative reward: -122.7233505432636
Episode: 4258/1000, score: 64, cumulative reward: -165.3511573355672
Episode: 4259/1000, score: 72, cumulative reward: -131.4739848273927
Episode: 4260/1000, score: 49, cumulative reward: -146.40905705385813
Episode: 4261/1000, score: 60, cumulative reward: -102.87013094886247
Episode: 4262/1000, score: 114, cumulative reward: -132.46003914312956
Episode: 4263/1000, score: 78, cumulative reward: -181.497257941645
Episode: 4264/1000, score: 67, cumulative reward: -171.35780269587457
Episode: 4265/1000, score: 119, cumulative reward: 19.408965656915996
Episode: 4266/1000, score: 92, cumulative reward: -151.46737971033036
Episode: 4267/1000, score: 73, cumulative reward: -121.7890349098231
Episode: 4268/1000, score: 57, cumulative reward: -169.63228722968842
Episode: 4269/1000, score: 81, cumulative reward: -353.72045490219887
Episode: 4270/1000, score: 69, cumulative reward: -258.5407966278145
Episode: 4271/1000, score: 78, cumulative reward: -443.00636563340794
Episode: 4272/1000, score: 69, cumulative reward: -523.461448201321
Episode: 4273/1000, score: 63, cumulative reward: -208.5536752963907
Episode: 4274/1000, score: 134, cumulative reward: -246.75156052306025
Episode: 4275/1000, score: 83, cumulative reward: -320.4690010495284
Episode: 4276/1000, score: 179, cumulative reward: 25.69303406479652
Episode: 4277/1000, score: 59, cumulative reward: -110.09027342606839
Episode: 4278/1000, score: 96, cumulative reward: -290.44305550535125
Episode: 4279/1000, score: 77, cumulative reward: -338.58463466063847
Episode: 4280/1000, score: 58, cumulative reward: -443.45085274359354
Episode: 4281/1000, score: 70, cumulative reward: -680.7368522245185
Episode: 4282/1000, score: 70, cumulative reward: -129.3451930828113
Episode: 4283/1000, score: 89, cumulative reward: -267.9563153388787
Episode: 4284/1000, score: 175, cumulative reward: -43.80863471993119
Episode: 4285/1000, score: 51, cumulative reward: -233.47750251702746
Episode: 4286/1000, score: 120, cumulative reward: -29.30408011824302
Episode: 4287/1000, score: 62, cumulative reward: -108.41675849681769
Episode: 4288/1000, score: 105, cumulative reward: -191.88742879564302
Episode: 4289/1000, score: 108, cumulative reward: -317.89810673488853
Episode: 4290/1000, score: 71, cumulative reward: -25.433765469910384
Episode: 4291/1000, score: 68, cumulative reward: -253.20500619182124
Episode: 4292/1000, score: 132, cumulative reward: -147.13118038465217
Episode: 4293/1000, score: 68, cumulative reward: -298.50432737183155
Episode: 4294/1000, score: 49, cumulative reward: -244.19811418629203
Episode: 4295/1000, score: 143, cumulative reward: -86.27263336023354
Episode: 4296/1000, score: 73, cumulative reward: -99.19752993395755
Episode: 4297/1000, score: 174, cumulative reward: -158.4321381380053
Episode: 4298/1000, score: 166, cumulative reward: -167.83330487680922
Episode: 4299/1000, score: 92, cumulative reward: -208.59097616604132
Episode: 4300/1000, score: 92, cumulative reward: -136.64832313692196
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4301/1000, score: 71, cumulative reward: -30.615416860940698
Episode: 4302/1000, score: 141, cumulative reward: -39.87033978524217
Episode: 4303/1000, score: 58, cumulative reward: -75.94249335040888
Episode: 4304/1000, score: 56, cumulative reward: -275.27566521100323
Episode: 4305/1000, score: 63, cumulative reward: -121.6538742473653
Episode: 4306/1000, score: 89, cumulative reward: -174.05066315439427
Episode: 4307/1000, score: 112, cumulative reward: -95.88993927236217
Episode: 4308/1000, score: 56, cumulative reward: -128.0102111521722
Episode: 4309/1000, score: 121, cumulative reward: -63.48545430371271
Episode: 4310/1000, score: 95, cumulative reward: -95.89069126418022
Episode: 4311/1000, score: 63, cumulative reward: -137.2731842158857
Episode: 4312/1000, score: 79, cumulative reward: -136.97647596268442
Episode: 4313/1000, score: 61, cumulative reward: -116.24289004795321
Episode: 4314/1000, score: 170, cumulative reward: -454.67599707603677
Episode: 4315/1000, score: 68, cumulative reward: -61.26931586729399
Episode: 4316/1000, score: 70, cumulative reward: -250.22181726329478
Episode: 4317/1000, score: 95, cumulative reward: -131.31615923005273
Episode: 4318/1000, score: 89, cumulative reward: -115.3531567541601
Episode: 4319/1000, score: 82, cumulative reward: -100.32534965078489
Episode: 4320/1000, score: 128, cumulative reward: -152.2722304529742
Episode: 4321/1000, score: 88, cumulative reward: -91.79692081207907
Episode: 4322/1000, score: 75, cumulative reward: -85.42812401762149
Episode: 4323/1000, score: 103, cumulative reward: -82.96794847978754
Episode: 4324/1000, score: 50, cumulative reward: -62.09663026426553
Episode: 4325/1000, score: 83, cumulative reward: -33.53689485265704
Episode: 4326/1000, score: 140, cumulative reward: -595.3454862789508
Episode: 4327/1000, score: 124, cumulative reward: -737.6170011053733
Episode: 4328/1000, score: 116, cumulative reward: -715.8301081836332
Episode: 4329/1000, score: 91, cumulative reward: -582.135166899791
Episode: 4330/1000, score: 77, cumulative reward: -701.9339428892
Episode: 4331/1000, score: 128, cumulative reward: -540.1340669731401
Episode: 4332/1000, score: 83, cumulative reward: -307.32881398792847
Episode: 4333/1000, score: 91, cumulative reward: -541.8400963917811
Episode: 4334/1000, score: 112, cumulative reward: -198.7613784437043
Episode: 4335/1000, score: 238, cumulative reward: -300.56383346806945
Episode: 4336/1000, score: 82, cumulative reward: -317.1196052913825
Episode: 4337/1000, score: 119, cumulative reward: -463.76552243528164
Episode: 4338/1000, score: 232, cumulative reward: -152.0644010804947
Episode: 4339/1000, score: 88, cumulative reward: -44.57127150555769
Episode: 4340/1000, score: 100, cumulative reward: -344.10841593571365
Episode: 4341/1000, score: 130, cumulative reward: -237.58587134826695
Episode: 4342/1000, score: 145, cumulative reward: -884.0394122501122
Episode: 4343/1000, score: 125, cumulative reward: -1043.7299329085922
Episode: 4344/1000, score: 83, cumulative reward: -341.7263571397442
Episode: 4345/1000, score: 310, cumulative reward: -878.6965750450275
Episode: 4346/1000, score: 81, cumulative reward: -584.3632197820278
Episode: 4347/1000, score: 207, cumulative reward: -290.33578833409587
Episode: 4348/1000, score: 105, cumulative reward: -647.8597139742651
Episode: 4349/1000, score: 101, cumulative reward: -577.2429006206772
Episode: 4350/1000, score: 203, cumulative reward: -1185.3260717245694
Episode: 4351/1000, score: 115, cumulative reward: -630.4705585859401
Episode: 4352/1000, score: 99, cumulative reward: -503.6456628351096
Episode: 4353/1000, score: 130, cumulative reward: -403.56348269874036
Episode: 4354/1000, score: 168, cumulative reward: -614.3722989847554
Episode: 4355/1000, score: 111, cumulative reward: -710.186499248866
Episode: 4356/1000, score: 73, cumulative reward: -33.630818461677634
Episode: 4357/1000, score: 82, cumulative reward: -288.59363954771527
Episode: 4358/1000, score: 92, cumulative reward: -453.58843482007285
Episode: 4359/1000, score: 98, cumulative reward: -391.202068486306
Episode: 4360/1000, score: 69, cumulative reward: -116.30142246629013
Episode: 4361/1000, score: 168, cumulative reward: -836.4075906795618
Episode: 4362/1000, score: 174, cumulative reward: -919.5825099138935
Episode: 4363/1000, score: 177, cumulative reward: -475.25212396360007
Episode: 4364/1000, score: 130, cumulative reward: -148.7306640872677
Episode: 4365/1000, score: 126, cumulative reward: -649.1510936812971
Episode: 4366/1000, score: 128, cumulative reward: -839.8305762773313
Episode: 4367/1000, score: 147, cumulative reward: -672.6501510137118
Episode: 4368/1000, score: 115, cumulative reward: -268.8766365339902
Episode: 4369/1000, score: 125, cumulative reward: -156.60802812935373
Episode: 4370/1000, score: 106, cumulative reward: -703.2929671462181
Episode: 4371/1000, score: 158, cumulative reward: -479.22528279814975
Episode: 4372/1000, score: 87, cumulative reward: -547.4614940745402
Episode: 4373/1000, score: 144, cumulative reward: -463.39008870845305
Episode: 4374/1000, score: 279, cumulative reward: -300.1656549782715
Episode: 4375/1000, score: 162, cumulative reward: -248.52653082753477
Episode: 4376/1000, score: 124, cumulative reward: -350.03710644968237
Episode: 4377/1000, score: 107, cumulative reward: -334.96809097524635
Episode: 4378/1000, score: 95, cumulative reward: -468.4237729082579
Episode: 4379/1000, score: 162, cumulative reward: -501.68431950047676
Episode: 4380/1000, score: 108, cumulative reward: -421.80523427009285
Episode: 4381/1000, score: 126, cumulative reward: -313.0926040086413
Episode: 4382/1000, score: 145, cumulative reward: -963.5421417398901
Episode: 4383/1000, score: 119, cumulative reward: -647.6268375987975
Episode: 4384/1000, score: 136, cumulative reward: -609.5988797870118
Episode: 4385/1000, score: 130, cumulative reward: -402.1772219796577
Episode: 4386/1000, score: 125, cumulative reward: -313.76408484442265
Episode: 4387/1000, score: 114, cumulative reward: -319.1592746773953
Episode: 4388/1000, score: 172, cumulative reward: -768.776204552425
Episode: 4389/1000, score: 123, cumulative reward: -351.2947245839297
Episode: 4390/1000, score: 215, cumulative reward: -847.3116919265493
Episode: 4391/1000, score: 235, cumulative reward: -746.3167133170928
Episode: 4392/1000, score: 112, cumulative reward: -704.3154656849761
Episode: 4393/1000, score: 151, cumulative reward: -622.4811455336368
Episode: 4394/1000, score: 148, cumulative reward: -549.2205755995984
Episode: 4395/1000, score: 135, cumulative reward: -586.0050681411874
Episode: 4396/1000, score: 200, cumulative reward: -612.3685484152885
Episode: 4397/1000, score: 148, cumulative reward: -435.4435269974435
Episode: 4398/1000, score: 104, cumulative reward: -446.15201734526124
Episode: 4399/1000, score: 113, cumulative reward: -286.50962000365115
Episode: 4400/1000, score: 104, cumulative reward: -311.51059709764917
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4401/1000, score: 230, cumulative reward: -591.6326265759811
Episode: 4402/1000, score: 125, cumulative reward: -375.4503797809721
Episode: 4403/1000, score: 92, cumulative reward: -420.3329785436665
Episode: 4404/1000, score: 127, cumulative reward: -267.29404937177463
Episode: 4405/1000, score: 167, cumulative reward: -610.6262401172421
Episode: 4406/1000, score: 163, cumulative reward: -402.0241385973297
Episode: 4407/1000, score: 223, cumulative reward: -534.7840205594773
Episode: 4408/1000, score: 169, cumulative reward: -379.0942289702461
Episode: 4409/1000, score: 137, cumulative reward: -576.7108770776827
Episode: 4410/1000, score: 147, cumulative reward: -598.5742191685879
Episode: 4411/1000, score: 98, cumulative reward: -497.5762729744197
Episode: 4412/1000, score: 92, cumulative reward: -423.7698453639285
Episode: 4413/1000, score: 81, cumulative reward: -327.73844052225616
Episode: 4414/1000, score: 163, cumulative reward: -383.7628826528727
Episode: 4415/1000, score: 165, cumulative reward: -253.4758941631588
Episode: 4416/1000, score: 154, cumulative reward: -478.8555594666014
Episode: 4417/1000, score: 110, cumulative reward: -428.70281639120026
Episode: 4418/1000, score: 77, cumulative reward: -518.3176785392047
Episode: 4419/1000, score: 130, cumulative reward: -438.1464686164997
Episode: 4420/1000, score: 282, cumulative reward: -482.4847407665913
Episode: 4421/1000, score: 73, cumulative reward: -368.4916666657376
Episode: 4422/1000, score: 113, cumulative reward: -269.45215888159953
Episode: 4423/1000, score: 82, cumulative reward: -481.0970605273561
Episode: 4424/1000, score: 218, cumulative reward: -660.7117880978359
Episode: 4425/1000, score: 144, cumulative reward: -585.6846918706356
Episode: 4426/1000, score: 80, cumulative reward: -486.1015217807305
Episode: 4427/1000, score: 104, cumulative reward: -246.1590311651723
Episode: 4428/1000, score: 112, cumulative reward: -456.7017535217433
Episode: 4429/1000, score: 138, cumulative reward: -424.1008667726652
Episode: 4430/1000, score: 103, cumulative reward: -378.7731631282686
Episode: 4431/1000, score: 88, cumulative reward: -218.76135735497053
Episode: 4432/1000, score: 134, cumulative reward: -185.32796415248936
Episode: 4433/1000, score: 136, cumulative reward: -210.37641414143175
Episode: 4434/1000, score: 92, cumulative reward: -393.2769120041411
Episode: 4435/1000, score: 72, cumulative reward: -428.071007599458
Episode: 4436/1000, score: 118, cumulative reward: -328.00318585382615
Episode: 4437/1000, score: 160, cumulative reward: -670.7249075097652
Episode: 4438/1000, score: 292, cumulative reward: -354.47458639100034
Episode: 4439/1000, score: 220, cumulative reward: -529.229065873229
Episode: 4440/1000, score: 100, cumulative reward: -549.7537158236694
Episode: 4441/1000, score: 109, cumulative reward: -392.2186521897726
Episode: 4442/1000, score: 108, cumulative reward: -383.92056301830644
Episode: 4443/1000, score: 129, cumulative reward: -225.51037474787347
Episode: 4444/1000, score: 319, cumulative reward: -217.51773742644096
Episode: 4445/1000, score: 133, cumulative reward: -203.54944300864028
Episode: 4446/1000, score: 134, cumulative reward: -295.33271882206236
Episode: 4447/1000, score: 104, cumulative reward: -267.80101910964726
Episode: 4448/1000, score: 126, cumulative reward: -297.78768349094776
Episode: 4449/1000, score: 208, cumulative reward: -271.8101671772194
Episode: 4450/1000, score: 132, cumulative reward: -269.75564612666886
Episode: 4451/1000, score: 279, cumulative reward: -453.1510648006539
Episode: 4452/1000, score: 338, cumulative reward: -241.49846040030877
Episode: 4453/1000, score: 239, cumulative reward: -184.33052223693483
Episode: 4454/1000, score: 155, cumulative reward: -292.9961018042791
Episode: 4455/1000, score: 152, cumulative reward: -400.9345241802744
Episode: 4456/1000, score: 181, cumulative reward: -129.6733055634295
Episode: 4457/1000, score: 91, cumulative reward: -280.5109842629521
Episode: 4458/1000, score: 177, cumulative reward: -285.32111241476684
Episode: 4459/1000, score: 203, cumulative reward: -270.7679826637333
Episode: 4460/1000, score: 187, cumulative reward: -156.70803037228342
Episode: 4461/1000, score: 148, cumulative reward: -180.98450968776052
Episode: 4462/1000, score: 373, cumulative reward: -730.8613798287965
Episode: 4463/1000, score: 447, cumulative reward: -269.93400804935686
Episode: 4464/1000, score: 99, cumulative reward: -367.1074288291462
Episode: 4465/1000, score: 352, cumulative reward: -522.661705442041
Episode: 4466/1000, score: 247, cumulative reward: -290.47751327715537
Episode: 4467/1000, score: 200, cumulative reward: -363.7968129593482
Episode: 4468/1000, score: 102, cumulative reward: -293.8542915938722
Episode: 4469/1000, score: 136, cumulative reward: -356.8181584760145
Episode: 4470/1000, score: 227, cumulative reward: -408.56055540259786
Episode: 4471/1000, score: 127, cumulative reward: -260.81915050195084
Episode: 4472/1000, score: 662, cumulative reward: -461.9634173828028
Episode: 4473/1000, score: 119, cumulative reward: -271.76103525211886
Episode: 4474/1000, score: 145, cumulative reward: -307.75723515344976
Episode: 4475/1000, score: 358, cumulative reward: -172.82775833058224
Episode: 4476/1000, score: 412, cumulative reward: -473.4835710582586
Episode: 4477/1000, score: 253, cumulative reward: -329.5814625495444
Episode: 4478/1000, score: 252, cumulative reward: -334.7120327350332
Episode: 4479/1000, score: 288, cumulative reward: -414.05864565700665
Episode: 4480/1000, score: 132, cumulative reward: -459.01482106151053
Episode: 4481/1000, score: 147, cumulative reward: -391.9342783268562
Episode: 4482/1000, score: 81, cumulative reward: -13.516593894406952
Episode: 4483/1000, score: 101, cumulative reward: -6.4229114943047705
Episode: 4484/1000, score: 111, cumulative reward: -239.34289787999194
Episode: 4485/1000, score: 153, cumulative reward: -351.051574331946
Episode: 4486/1000, score: 230, cumulative reward: -403.4580291396041
Episode: 4487/1000, score: 197, cumulative reward: -236.89140331335688
Episode: 4488/1000, score: 116, cumulative reward: -86.05742068159334
Episode: 4489/1000, score: 138, cumulative reward: -225.6593227917028
Episode: 4490/1000, score: 169, cumulative reward: -690.3091491594117
Episode: 4491/1000, score: 127, cumulative reward: -332.07475280281017
Episode: 4492/1000, score: 147, cumulative reward: -243.45335940837384
Episode: 4493/1000, score: 141, cumulative reward: -283.3456091351195
Episode: 4494/1000, score: 58, cumulative reward: -479.54621460099986
Episode: 4495/1000, score: 91, cumulative reward: -216.01200725288825
Episode: 4496/1000, score: 61, cumulative reward: -331.41886418377806
Episode: 4497/1000, score: 78, cumulative reward: -513.0860103559849
Episode: 4498/1000, score: 54, cumulative reward: -223.42546197131136
Episode: 4499/1000, score: 57, cumulative reward: -175.21173677760413
Episode: 4500/1000, score: 60, cumulative reward: -96.71377888076411
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4501/1000, score: 79, cumulative reward: -235.04712892457775
Episode: 4502/1000, score: 82, cumulative reward: -648.1648050546803
Episode: 4503/1000, score: 79, cumulative reward: -579.3511133922077
Episode: 4504/1000, score: 104, cumulative reward: -791.2671163130857
Episode: 4505/1000, score: 118, cumulative reward: -272.8220801918718
Episode: 4506/1000, score: 53, cumulative reward: -203.9327954835053
Episode: 4507/1000, score: 85, cumulative reward: -216.13996116624142
Episode: 4508/1000, score: 123, cumulative reward: -228.55611192795416
Episode: 4509/1000, score: 100, cumulative reward: -128.15122317174377
Episode: 4510/1000, score: 77, cumulative reward: -744.4784858581713
Episode: 4511/1000, score: 75, cumulative reward: -335.81443563883886
Episode: 4512/1000, score: 58, cumulative reward: -221.812502763788
Episode: 4513/1000, score: 114, cumulative reward: -213.90742324099614
Episode: 4514/1000, score: 72, cumulative reward: 8.138173861571701
Episode: 4515/1000, score: 74, cumulative reward: -192.37664759499694
Episode: 4516/1000, score: 82, cumulative reward: -338.2494648893222
Episode: 4517/1000, score: 93, cumulative reward: -113.33749147138198
Episode: 4518/1000, score: 99, cumulative reward: -4.144086967995861
Episode: 4519/1000, score: 101, cumulative reward: -157.6981150878426
Episode: 4520/1000, score: 94, cumulative reward: -116.11412395810467
Episode: 4521/1000, score: 96, cumulative reward: -496.7649765146394
Episode: 4522/1000, score: 117, cumulative reward: -292.57446802067466
Episode: 4523/1000, score: 92, cumulative reward: 48.080118048078134
Episode: 4524/1000, score: 68, cumulative reward: -178.06518219804474
Episode: 4525/1000, score: 159, cumulative reward: -301.79117632230515
Episode: 4526/1000, score: 63, cumulative reward: -163.41773294813052
Episode: 4527/1000, score: 84, cumulative reward: -146.1747963778048
Episode: 4528/1000, score: 55, cumulative reward: -162.47215098539232
Episode: 4529/1000, score: 94, cumulative reward: -90.99013937682793
Episode: 4530/1000, score: 55, cumulative reward: -69.04343301511351
Episode: 4531/1000, score: 101, cumulative reward: -52.36670959336875
Episode: 4532/1000, score: 55, cumulative reward: -138.31693593619926
Episode: 4533/1000, score: 85, cumulative reward: -135.00557103340455
Episode: 4534/1000, score: 79, cumulative reward: -372.9920623128657
Episode: 4535/1000, score: 73, cumulative reward: -164.03392405099666
Episode: 4536/1000, score: 54, cumulative reward: -190.89741312174132
Episode: 4537/1000, score: 110, cumulative reward: -85.1493104109261
Episode: 4538/1000, score: 106, cumulative reward: -137.48063413723978
Episode: 4539/1000, score: 55, cumulative reward: -66.34836378522397
Episode: 4540/1000, score: 83, cumulative reward: -95.33149176249272
Episode: 4541/1000, score: 84, cumulative reward: -59.23619656329977
Episode: 4542/1000, score: 132, cumulative reward: -433.34071012689463
Episode: 4543/1000, score: 63, cumulative reward: -171.91437279919424
Episode: 4544/1000, score: 138, cumulative reward: -128.50476325479343
Episode: 4545/1000, score: 215, cumulative reward: -576.7434942830464
Episode: 4546/1000, score: 122, cumulative reward: -244.6729737406359
Episode: 4547/1000, score: 118, cumulative reward: -415.81993005071763
Episode: 4548/1000, score: 130, cumulative reward: -852.2734718187336
Episode: 4549/1000, score: 66, cumulative reward: -62.38095186116441
Episode: 4550/1000, score: 101, cumulative reward: -425.5912385051431
Episode: 4551/1000, score: 131, cumulative reward: -30.650184913887813
Episode: 4552/1000, score: 112, cumulative reward: -323.25038693283454
Episode: 4553/1000, score: 249, cumulative reward: -171.17603428085482
Episode: 4554/1000, score: 90, cumulative reward: -106.32119496169817
Episode: 4555/1000, score: 65, cumulative reward: -12.715827240078198
Episode: 4556/1000, score: 65, cumulative reward: -57.34825179065548
Episode: 4557/1000, score: 247, cumulative reward: -51.191070735892296
Episode: 4558/1000, score: 95, cumulative reward: -95.96333048874516
Episode: 4559/1000, score: 74, cumulative reward: -125.54075363512062
Episode: 4560/1000, score: 83, cumulative reward: -182.82463134139175
Episode: 4561/1000, score: 133, cumulative reward: -93.69351508903692
Episode: 4562/1000, score: 77, cumulative reward: -128.52792468902376
Episode: 4563/1000, score: 132, cumulative reward: -128.81760206843262
Episode: 4564/1000, score: 209, cumulative reward: -250.2739577162844
Episode: 4565/1000, score: 76, cumulative reward: -101.73904193268976
Episode: 4566/1000, score: 81, cumulative reward: -142.36879549585154
Episode: 4567/1000, score: 101, cumulative reward: -94.78877508998208
Episode: 4568/1000, score: 95, cumulative reward: -119.91152847727366
Episode: 4569/1000, score: 319, cumulative reward: -126.51160263137267
Episode: 4570/1000, score: 294, cumulative reward: -681.9965446688517
Episode: 4571/1000, score: 401, cumulative reward: -368.8499675795851
Episode: 4572/1000, score: 144, cumulative reward: -329.4549011700335
Episode: 4573/1000, score: 87, cumulative reward: -284.56905007188357
Episode: 4574/1000, score: 333, cumulative reward: -216.09214592133776
Episode: 4575/1000, score: 449, cumulative reward: -319.98206843067317
Episode: 4576/1000, score: 138, cumulative reward: -295.9835471517391
Episode: 4577/1000, score: 239, cumulative reward: -311.84793072441266
Episode: 4578/1000, score: 299, cumulative reward: -240.31632654206942
Episode: 4579/1000, score: 266, cumulative reward: -323.01194186498174
Episode: 4580/1000, score: 298, cumulative reward: -388.22931254648586
Episode: 4581/1000, score: 382, cumulative reward: -296.7893958602714
Episode: 4582/1000, score: 172, cumulative reward: -210.94394685005466
Episode: 4583/1000, score: 219, cumulative reward: -279.86425454865457
Episode: 4584/1000, score: 269, cumulative reward: -193.9415479197661
Episode: 4585/1000, score: 151, cumulative reward: -233.4152044550332
Episode: 4586/1000, score: 592, cumulative reward: -228.00573162443277
Episode: 4587/1000, score: 286, cumulative reward: -468.32732160405334
Episode: 4588/1000, score: 215, cumulative reward: -430.8841785063621
Episode: 4589/1000, score: 71, cumulative reward: -446.5496775795514
Episode: 4590/1000, score: 233, cumulative reward: -355.5866152253823
Episode: 4591/1000, score: 137, cumulative reward: -374.4004096714315
Episode: 4592/1000, score: 110, cumulative reward: -140.4279734598615
Episode: 4593/1000, score: 126, cumulative reward: -205.28430227892545
Episode: 4594/1000, score: 203, cumulative reward: -251.53897615417947
Episode: 4595/1000, score: 136, cumulative reward: -495.4324187980655
Episode: 4596/1000, score: 103, cumulative reward: -412.3790079190176
Episode: 4597/1000, score: 115, cumulative reward: -359.28920946715226
Episode: 4598/1000, score: 183, cumulative reward: -89.13742182968807
Episode: 4599/1000, score: 108, cumulative reward: -252.0450387816922
Episode: 4600/1000, score: 101, cumulative reward: -192.25724540578375
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4601/1000, score: 94, cumulative reward: -151.45628895335628
Episode: 4602/1000, score: 154, cumulative reward: -120.93742546380346
Episode: 4603/1000, score: 77, cumulative reward: -148.36994570425335
Episode: 4604/1000, score: 99, cumulative reward: -249.48866097962218
Episode: 4605/1000, score: 102, cumulative reward: -191.88311457165963
Episode: 4606/1000, score: 110, cumulative reward: -160.2935196867958
Episode: 4607/1000, score: 91, cumulative reward: -269.6361312970163
Episode: 4608/1000, score: 99, cumulative reward: -221.77453856038267
Episode: 4609/1000, score: 94, cumulative reward: -113.48225889077924
Episode: 4610/1000, score: 144, cumulative reward: -150.01528603355035
Episode: 4611/1000, score: 94, cumulative reward: -134.49233333122768
Episode: 4612/1000, score: 90, cumulative reward: -242.40775349413383
Episode: 4613/1000, score: 109, cumulative reward: -216.7188255502221
Episode: 4614/1000, score: 99, cumulative reward: -206.1282030678825
Episode: 4615/1000, score: 71, cumulative reward: -71.82194697942037
Episode: 4616/1000, score: 107, cumulative reward: -282.6308603845548
Episode: 4617/1000, score: 59, cumulative reward: -32.32247803867749
Episode: 4618/1000, score: 128, cumulative reward: -132.51272643702038
Episode: 4619/1000, score: 164, cumulative reward: -94.84781202334479
Episode: 4620/1000, score: 260, cumulative reward: 210.78712662042793
Episode: 4621/1000, score: 112, cumulative reward: -53.337801113694965
Episode: 4622/1000, score: 134, cumulative reward: -282.83735083790475
Episode: 4623/1000, score: 64, cumulative reward: -463.3095474276086
Episode: 4624/1000, score: 77, cumulative reward: -176.48087439030863
Episode: 4625/1000, score: 138, cumulative reward: -136.60015081264237
Episode: 4626/1000, score: 103, cumulative reward: -2.0352328908383583
Episode: 4627/1000, score: 209, cumulative reward: -55.72941600176199
Episode: 4628/1000, score: 250, cumulative reward: -67.42509964187721
Episode: 4629/1000, score: 315, cumulative reward: -106.2062867895293
Episode: 4630/1000, score: 151, cumulative reward: -89.37918474425514
Episode: 4631/1000, score: 92, cumulative reward: -243.80777405625975
Episode: 4632/1000, score: 98, cumulative reward: -198.1816588637214
Episode: 4633/1000, score: 170, cumulative reward: 0.2164753149651375
Episode: 4634/1000, score: 514, cumulative reward: -248.23027000374725
Episode: 4635/1000, score: 380, cumulative reward: -132.61854808385732
Episode: 4636/1000, score: 149, cumulative reward: -184.9549395027724
Episode: 4637/1000, score: 220, cumulative reward: 32.53151815574026
Episode: 4638/1000, score: 216, cumulative reward: -273.13146897864783
Episode: 4639/1000, score: 119, cumulative reward: -372.91967043004416
Episode: 4640/1000, score: 144, cumulative reward: -4.237649888090189
Episode: 4641/1000, score: 356, cumulative reward: -185.9396380706584
Episode: 4642/1000, score: 63, cumulative reward: -157.89375571230846
Episode: 4643/1000, score: 189, cumulative reward: -156.99059365050044
Episode: 4644/1000, score: 127, cumulative reward: -360.24898458524854
Episode: 4645/1000, score: 92, cumulative reward: -398.2611632090427
Episode: 4646/1000, score: 82, cumulative reward: -126.95533458044346
Episode: 4647/1000, score: 149, cumulative reward: -307.69921614493353
Episode: 4648/1000, score: 246, cumulative reward: -86.06330005207478
Episode: 4649/1000, score: 178, cumulative reward: -168.5150238801374
Episode: 4650/1000, score: 241, cumulative reward: -130.0898670390564
Episode: 4651/1000, score: 297, cumulative reward: -330.3569474274158
Episode: 4652/1000, score: 108, cumulative reward: -198.80209845076996
Episode: 4653/1000, score: 148, cumulative reward: -263.4853052115234
Episode: 4654/1000, score: 238, cumulative reward: -89.92419086473387
Episode: 4655/1000, score: 109, cumulative reward: -281.7208710035019
Episode: 4656/1000, score: 166, cumulative reward: -223.2766233367498
Episode: 4657/1000, score: 141, cumulative reward: -256.2567711262297
Episode: 4658/1000, score: 68, cumulative reward: -87.34170714567053
Episode: 4659/1000, score: 151, cumulative reward: -90.80349896776009
Episode: 4660/1000, score: 57, cumulative reward: -55.37254869289525
Episode: 4661/1000, score: 92, cumulative reward: -112.09100446326408
Episode: 4662/1000, score: 184, cumulative reward: -135.64637695945157
Episode: 4663/1000, score: 170, cumulative reward: -96.82179303354066
Episode: 4664/1000, score: 69, cumulative reward: -120.6022678541714
Episode: 4665/1000, score: 97, cumulative reward: -133.29033209861672
Episode: 4666/1000, score: 104, cumulative reward: -167.5664208866348
Episode: 4667/1000, score: 342, cumulative reward: -85.97991275911247
Episode: 4668/1000, score: 331, cumulative reward: -293.3229641514798
Episode: 4669/1000, score: 176, cumulative reward: -158.87150747494692
Episode: 4670/1000, score: 105, cumulative reward: -314.48374311173046
Episode: 4671/1000, score: 209, cumulative reward: -289.97271092180415
Episode: 4672/1000, score: 120, cumulative reward: -204.83515967438416
Episode: 4673/1000, score: 992, cumulative reward: -493.0490012426716
Episode: 4674/1000, score: 254, cumulative reward: -380.8564928303236
Episode: 4675/1000, score: 116, cumulative reward: -196.43800350443172
Episode: 4676/1000, score: 650, cumulative reward: -738.0999743151931
Episode: 4677/1000, score: 116, cumulative reward: -272.3138498519168
Episode: 4678/1000, score: 102, cumulative reward: -476.72935471881414
Episode: 4679/1000, score: 54, cumulative reward: -106.22701841150482
Episode: 4680/1000, score: 250, cumulative reward: -288.41913389528565
Episode: 4681/1000, score: 218, cumulative reward: -281.8216451451044
Episode: 4682/1000, score: 121, cumulative reward: -203.06017747012174
Episode: 4683/1000, score: 143, cumulative reward: -235.55911587137157
Episode: 4684/1000, score: 186, cumulative reward: -358.29517908918
Episode: 4685/1000, score: 83, cumulative reward: -304.86442943105214
Episode: 4686/1000, score: 211, cumulative reward: -347.76416024635944
Episode: 4687/1000, score: 102, cumulative reward: -194.85164241946944
Episode: 4688/1000, score: 84, cumulative reward: -138.36868999396964
Episode: 4689/1000, score: 93, cumulative reward: -362.9306445362136
Episode: 4690/1000, score: 137, cumulative reward: -174.4464060710665
Episode: 4691/1000, score: 79, cumulative reward: -156.05783137361044
Episode: 4692/1000, score: 50, cumulative reward: -131.6764021387928
Episode: 4693/1000, score: 105, cumulative reward: -219.35961266531396
Episode: 4694/1000, score: 273, cumulative reward: -356.14528221363855
Episode: 4695/1000, score: 97, cumulative reward: -216.10402958675172
Episode: 4696/1000, score: 296, cumulative reward: -266.2274802461901
Episode: 4697/1000, score: 221, cumulative reward: -333.00485317991934
Episode: 4698/1000, score: 138, cumulative reward: -308.8390161174403
Episode: 4699/1000, score: 178, cumulative reward: -268.75119879320306
Episode: 4700/1000, score: 126, cumulative reward: -209.0651640082128
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4701/1000, score: 248, cumulative reward: -174.8489663466223
Episode: 4702/1000, score: 425, cumulative reward: -224.68490278631253
Episode: 4703/1000, score: 418, cumulative reward: -136.54717383481957
Episode: 4704/1000, score: 200, cumulative reward: -172.797347257569
Episode: 4705/1000, score: 328, cumulative reward: -197.94560653784234
Episode: 4706/1000, score: 154, cumulative reward: -162.39278094987847
Episode: 4707/1000, score: 120, cumulative reward: -38.14861143634665
Episode: 4708/1000, score: 208, cumulative reward: -247.22493712576164
Episode: 4709/1000, score: 141, cumulative reward: -130.4057114269411
Episode: 4710/1000, score: 189, cumulative reward: -188.50700258583333
Episode: 4711/1000, score: 199, cumulative reward: -184.90720764234234
Episode: 4712/1000, score: 240, cumulative reward: -294.3484374673644
Episode: 4713/1000, score: 129, cumulative reward: -287.91813051639133
Episode: 4714/1000, score: 415, cumulative reward: -217.40432706484316
Episode: 4715/1000, score: 241, cumulative reward: -153.9614628802692
Episode: 4716/1000, score: 482, cumulative reward: -261.52835567749037
Episode: 4717/1000, score: 253, cumulative reward: -226.5534796605261
Episode: 4718/1000, score: 162, cumulative reward: -167.63102236556222
Episode: 4719/1000, score: 334, cumulative reward: -204.86542446585054
Episode: 4720/1000, score: 509, cumulative reward: -459.6890514910777
Episode: 4721/1000, score: 90, cumulative reward: -146.0611958537859
Episode: 4722/1000, score: 62, cumulative reward: -171.48762547906807
Episode: 4723/1000, score: 92, cumulative reward: -184.41165037341227
Episode: 4724/1000, score: 167, cumulative reward: -170.48888621057807
Episode: 4725/1000, score: 134, cumulative reward: -188.75536591744697
Episode: 4726/1000, score: 145, cumulative reward: -525.255551671283
Episode: 4727/1000, score: 187, cumulative reward: -76.609648892665
Episode: 4728/1000, score: 197, cumulative reward: -212.9807822817242
Episode: 4729/1000, score: 103, cumulative reward: -405.4037503023365
Episode: 4730/1000, score: 167, cumulative reward: -292.2090137193303
Episode: 4731/1000, score: 113, cumulative reward: -423.8973242586783
Episode: 4732/1000, score: 125, cumulative reward: -265.6532863790036
Episode: 4733/1000, score: 145, cumulative reward: -164.71198015738074
Episode: 4734/1000, score: 127, cumulative reward: -219.40016614723442
Episode: 4735/1000, score: 109, cumulative reward: -101.24478377711239
Episode: 4736/1000, score: 135, cumulative reward: -140.59372892246776
Episode: 4737/1000, score: 97, cumulative reward: -246.38762887415504
Episode: 4738/1000, score: 53, cumulative reward: -326.89412388370954
Episode: 4739/1000, score: 68, cumulative reward: -233.03795041051012
Episode: 4740/1000, score: 137, cumulative reward: -59.64920055602731
Episode: 4741/1000, score: 117, cumulative reward: -237.1781809631094
Episode: 4742/1000, score: 95, cumulative reward: -59.12039466952233
Episode: 4743/1000, score: 120, cumulative reward: -401.78585984284433
Episode: 4744/1000, score: 52, cumulative reward: -381.7775346017184
Episode: 4745/1000, score: 85, cumulative reward: -106.19092216376987
Episode: 4746/1000, score: 56, cumulative reward: -228.61363478048014
Episode: 4747/1000, score: 86, cumulative reward: -371.8883571718767
Episode: 4748/1000, score: 76, cumulative reward: -272.95537071728876
Episode: 4749/1000, score: 109, cumulative reward: -228.28004948462024
Episode: 4750/1000, score: 183, cumulative reward: 16.612458237035597
Episode: 4751/1000, score: 174, cumulative reward: -240.12411175407922
Episode: 4752/1000, score: 79, cumulative reward: -327.6486299805061
Episode: 4753/1000, score: 62, cumulative reward: -182.23714442091403
Episode: 4754/1000, score: 149, cumulative reward: -5.900507906566574
Episode: 4755/1000, score: 108, cumulative reward: -387.9250607555628
Episode: 4756/1000, score: 77, cumulative reward: -160.65370582079268
Episode: 4757/1000, score: 68, cumulative reward: -81.70491622407965
Episode: 4758/1000, score: 93, cumulative reward: -433.9835106739141
Episode: 4759/1000, score: 61, cumulative reward: -177.94266351188057
Episode: 4760/1000, score: 73, cumulative reward: -242.3323184323225
Episode: 4761/1000, score: 135, cumulative reward: -265.59739889977175
Episode: 4762/1000, score: 158, cumulative reward: -359.6449930869565
Episode: 4763/1000, score: 116, cumulative reward: -51.87421208983965
Episode: 4764/1000, score: 52, cumulative reward: -229.8519591773749
Episode: 4765/1000, score: 97, cumulative reward: -497.01259656250306
Episode: 4766/1000, score: 266, cumulative reward: -391.2553427590178
Episode: 4767/1000, score: 169, cumulative reward: -589.3173445648058
Episode: 4768/1000, score: 104, cumulative reward: -211.5834198836896
Episode: 4769/1000, score: 104, cumulative reward: -99.46872297286623
Episode: 4770/1000, score: 59, cumulative reward: -153.70398447842982
Episode: 4771/1000, score: 126, cumulative reward: -150.86822473165236
Episode: 4772/1000, score: 113, cumulative reward: -162.58002624488262
Episode: 4773/1000, score: 89, cumulative reward: -126.60631495712427
Episode: 4774/1000, score: 115, cumulative reward: -48.36136139987419
Episode: 4775/1000, score: 161, cumulative reward: -310.0045816387245
Episode: 4776/1000, score: 91, cumulative reward: -50.027874079531486
Episode: 4777/1000, score: 166, cumulative reward: -122.86121445187264
Episode: 4778/1000, score: 79, cumulative reward: -92.13270195747205
Episode: 4779/1000, score: 151, cumulative reward: -37.77843456091217
Episode: 4780/1000, score: 118, cumulative reward: -502.6086363128123
Episode: 4781/1000, score: 114, cumulative reward: -314.1192546240329
Episode: 4782/1000, score: 194, cumulative reward: -37.3239490271573
Episode: 4783/1000, score: 92, cumulative reward: -259.452384336875
Episode: 4784/1000, score: 139, cumulative reward: -339.2348881280197
Episode: 4785/1000, score: 192, cumulative reward: -303.69226069340226
Episode: 4786/1000, score: 456, cumulative reward: -159.77500614405682
Episode: 4787/1000, score: 149, cumulative reward: -297.0603490657706
Episode: 4788/1000, score: 66, cumulative reward: -348.7481110230637
Episode: 4789/1000, score: 150, cumulative reward: -173.77857589665632
Episode: 4790/1000, score: 186, cumulative reward: -374.27529002900496
Episode: 4791/1000, score: 76, cumulative reward: -147.92518175864404
Episode: 4792/1000, score: 67, cumulative reward: -133.66362293770098
Episode: 4793/1000, score: 66, cumulative reward: -496.34676053676395
Episode: 4794/1000, score: 56, cumulative reward: -496.3341551023477
Episode: 4795/1000, score: 74, cumulative reward: -698.1862935244621
Episode: 4796/1000, score: 56, cumulative reward: -464.8642562973878
Episode: 4797/1000, score: 72, cumulative reward: -461.7419122841999
Episode: 4798/1000, score: 82, cumulative reward: -936.016890533411
Episode: 4799/1000, score: 53, cumulative reward: -106.17603372712658
Episode: 4800/1000, score: 82, cumulative reward: -137.14770989828494
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4801/1000, score: 86, cumulative reward: -489.627192785942
Episode: 4802/1000, score: 62, cumulative reward: -582.5928349212827
Episode: 4803/1000, score: 61, cumulative reward: -454.64189255852904
Episode: 4804/1000, score: 68, cumulative reward: -180.8986237583914
Episode: 4805/1000, score: 69, cumulative reward: -196.5246262910576
Episode: 4806/1000, score: 76, cumulative reward: -126.11426220263147
Episode: 4807/1000, score: 49, cumulative reward: -347.36123609178986
Episode: 4808/1000, score: 64, cumulative reward: -669.3856697116557
Episode: 4809/1000, score: 73, cumulative reward: -333.942495792858
Episode: 4810/1000, score: 57, cumulative reward: -151.8153831430505
Episode: 4811/1000, score: 78, cumulative reward: -463.4160214654308
Episode: 4812/1000, score: 83, cumulative reward: -83.55499363248236
Episode: 4813/1000, score: 88, cumulative reward: -546.2906699222647
Episode: 4814/1000, score: 91, cumulative reward: -129.76592708771054
Episode: 4815/1000, score: 88, cumulative reward: -54.536003855065914
Episode: 4816/1000, score: 115, cumulative reward: -649.7006578269029
Episode: 4817/1000, score: 93, cumulative reward: -664.2029499131087
Episode: 4818/1000, score: 103, cumulative reward: -20.07310939271241
Episode: 4819/1000, score: 95, cumulative reward: -563.2397316606604
Episode: 4820/1000, score: 117, cumulative reward: -311.0937597488154
Episode: 4821/1000, score: 83, cumulative reward: -46.532599992294806
Episode: 4822/1000, score: 83, cumulative reward: -339.74002667982074
Episode: 4823/1000, score: 85, cumulative reward: -743.8891452097885
Episode: 4824/1000, score: 85, cumulative reward: -188.63856709142848
Episode: 4825/1000, score: 107, cumulative reward: -132.71643886121842
Episode: 4826/1000, score: 176, cumulative reward: -6.737336202594776
Episode: 4827/1000, score: 71, cumulative reward: -157.87731068493468
Episode: 4828/1000, score: 58, cumulative reward: -417.5587646020593
Episode: 4829/1000, score: 130, cumulative reward: -535.2259195697617
Episode: 4830/1000, score: 82, cumulative reward: -147.28382588115744
Episode: 4831/1000, score: 55, cumulative reward: -140.37663461701817
Episode: 4832/1000, score: 110, cumulative reward: -475.420180216418
Episode: 4833/1000, score: 156, cumulative reward: -737.4385404346755
Episode: 4834/1000, score: 83, cumulative reward: -299.48104702814027
Episode: 4835/1000, score: 83, cumulative reward: -545.8366257205292
Episode: 4836/1000, score: 133, cumulative reward: -561.4315157299778
Episode: 4837/1000, score: 168, cumulative reward: -487.6259416364215
Episode: 4838/1000, score: 74, cumulative reward: -211.39452442766157
Episode: 4839/1000, score: 84, cumulative reward: -160.46856189217795
Episode: 4840/1000, score: 84, cumulative reward: -162.09683393673276
Episode: 4841/1000, score: 190, cumulative reward: -426.82065100217824
Episode: 4842/1000, score: 131, cumulative reward: -458.66502438422486
Episode: 4843/1000, score: 80, cumulative reward: -188.66014802700167
Episode: 4844/1000, score: 154, cumulative reward: -427.23190042933965
Episode: 4845/1000, score: 94, cumulative reward: -230.60272631121111
Episode: 4846/1000, score: 163, cumulative reward: -442.39020420149745
Episode: 4847/1000, score: 141, cumulative reward: -400.88331335726235
Episode: 4848/1000, score: 82, cumulative reward: -100.99992417779347
Episode: 4849/1000, score: 89, cumulative reward: -202.0800637291262
Episode: 4850/1000, score: 71, cumulative reward: -138.32261683764227
Episode: 4851/1000, score: 79, cumulative reward: -320.14479950033615
Episode: 4852/1000, score: 66, cumulative reward: -103.30917752969114
Episode: 4853/1000, score: 62, cumulative reward: -132.41139857320368
Episode: 4854/1000, score: 169, cumulative reward: 24.14737591571327
Episode: 4855/1000, score: 73, cumulative reward: -390.2009449682964
Episode: 4856/1000, score: 114, cumulative reward: -514.5390504653724
Episode: 4857/1000, score: 92, cumulative reward: -394.5206907090041
Episode: 4858/1000, score: 82, cumulative reward: -715.5966405082077
Episode: 4859/1000, score: 82, cumulative reward: -394.8142810454863
Episode: 4860/1000, score: 122, cumulative reward: -581.8831968253851
Episode: 4861/1000, score: 77, cumulative reward: -318.80940664280297
Episode: 4862/1000, score: 122, cumulative reward: -237.54497640208078
Episode: 4863/1000, score: 156, cumulative reward: -580.3554686993236
Episode: 4864/1000, score: 75, cumulative reward: -381.94381070928006
Episode: 4865/1000, score: 107, cumulative reward: -208.44354401361863
Episode: 4866/1000, score: 209, cumulative reward: -234.03927883649493
Episode: 4867/1000, score: 121, cumulative reward: -550.9392806250053
Episode: 4868/1000, score: 67, cumulative reward: -472.47040647558185
Episode: 4869/1000, score: 206, cumulative reward: -407.97403500314806
Episode: 4870/1000, score: 85, cumulative reward: -75.25286470588539
Episode: 4871/1000, score: 225, cumulative reward: -126.64551172412837
Episode: 4872/1000, score: 92, cumulative reward: -506.3573898768267
Episode: 4873/1000, score: 90, cumulative reward: -147.23420737111172
Episode: 4874/1000, score: 91, cumulative reward: -95.26560329138741
Episode: 4875/1000, score: 84, cumulative reward: -396.3029735374554
Episode: 4876/1000, score: 105, cumulative reward: -355.87745420900944
Episode: 4877/1000, score: 65, cumulative reward: -288.66129199730597
Episode: 4878/1000, score: 62, cumulative reward: -305.21573605141936
Episode: 4879/1000, score: 75, cumulative reward: -542.3648057257108
Episode: 4880/1000, score: 65, cumulative reward: -268.8672567758159
Episode: 4881/1000, score: 93, cumulative reward: -383.7484879403136
Episode: 4882/1000, score: 88, cumulative reward: -223.35667957905991
Episode: 4883/1000, score: 104, cumulative reward: -634.7366879703624
Episode: 4884/1000, score: 108, cumulative reward: -252.94000364276712
Episode: 4885/1000, score: 136, cumulative reward: -15.212369413628267
Episode: 4886/1000, score: 72, cumulative reward: -293.03909648006714
Episode: 4887/1000, score: 87, cumulative reward: -265.4534599459636
Episode: 4888/1000, score: 92, cumulative reward: -377.78145942674826
Episode: 4889/1000, score: 57, cumulative reward: -218.78315636568755
Episode: 4890/1000, score: 78, cumulative reward: -272.339950280352
Episode: 4891/1000, score: 94, cumulative reward: -455.55484041594514
Episode: 4892/1000, score: 89, cumulative reward: -366.41116177331395
Episode: 4893/1000, score: 94, cumulative reward: -428.4707748500423
Episode: 4894/1000, score: 75, cumulative reward: -211.86040360876532
Episode: 4895/1000, score: 102, cumulative reward: -28.23298771582614
Episode: 4896/1000, score: 75, cumulative reward: -507.6858684370628
Episode: 4897/1000, score: 154, cumulative reward: -133.75008792471723
Episode: 4898/1000, score: 96, cumulative reward: -244.21410050213862
Episode: 4899/1000, score: 72, cumulative reward: -435.8442287784185
Episode: 4900/1000, score: 100, cumulative reward: -152.90298551427566
模型状态字典已保存至 c:\Users\isrya\#MyFiles\#MyCode\GithubUoL\COMP532\AS02\models\model_0.pth
Episode: 4901/1000, score: 119, cumulative reward: -189.62098395197415
Episode: 4902/1000, score: 89, cumulative reward: -326.3962413515882
Episode: 4903/1000, score: 124, cumulative reward: -36.638030112439125
Episode: 4904/1000, score: 117, cumulative reward: -381.1404765820091
Episode: 4905/1000, score: 128, cumulative reward: -261.7789784658159
Episode: 4906/1000, score: 85, cumulative reward: -72.12590543324009
Episode: 4907/1000, score: 67, cumulative reward: -95.15192264262802
Episode: 4908/1000, score: 73, cumulative reward: -46.36332229056984
Episode: 4909/1000, score: 95, cumulative reward: -69.60292598310943
Episode: 4910/1000, score: 73, cumulative reward: -65.36958964551343
Episode: 4911/1000, score: 62, cumulative reward: -223.14117005037147
Episode: 4912/1000, score: 64, cumulative reward: -177.25981176792598
Episode: 4913/1000, score: 55, cumulative reward: -120.03730047688035
Episode: 4914/1000, score: 54, cumulative reward: -377.71627940458933
Episode: 4915/1000, score: 103, cumulative reward: -235.34731304542748
Episode: 4916/1000, score: 66, cumulative reward: -162.9900203189467
Episode: 4917/1000, score: 73, cumulative reward: -301.1368596602068
Episode: 4918/1000, score: 106, cumulative reward: -270.3940455653556
Episode: 4919/1000, score: 107, cumulative reward: -257.46151148558874
Episode: 4920/1000, score: 98, cumulative reward: -186.86183887607703
Episode: 4921/1000, score: 103, cumulative reward: -7.735569561788623
Episode: 4922/1000, score: 70, cumulative reward: -311.776072906973
Episode: 4923/1000, score: 83, cumulative reward: -3.8372281047361554
Episode: 4924/1000, score: 87, cumulative reward: -574.4646093409976
Episode: 4925/1000, score: 121, cumulative reward: -101.55631130333347
Episode: 4926/1000, score: 93, cumulative reward: -174.43132150864383
Episode: 4927/1000, score: 90, cumulative reward: -178.2547779499635
Episode: 4928/1000, score: 76, cumulative reward: -347.15534059044666
Episode: 4929/1000, score: 81, cumulative reward: -616.1193773234423
Episode: 4930/1000, score: 94, cumulative reward: -186.97230411308004
Episode: 4931/1000, score: 81, cumulative reward: -359.11543230909297
Episode: 4932/1000, score: 97, cumulative reward: -253.42704193915762
Episode: 4933/1000, score: 120, cumulative reward: -247.75139801553507
Episode: 4934/1000, score: 98, cumulative reward: -182.97164449092986
Episode: 4935/1000, score: 79, cumulative reward: -131.81025679402643
Episode: 4936/1000, score: 87, cumulative reward: -116.29231780103231
Episode: 4937/1000, score: 87, cumulative reward: -364.51611832602566
Episode: 4938/1000, score: 105, cumulative reward: -153.02584707919
Episode: 4939/1000, score: 103, cumulative reward: -293.3350814505547
Episode: 4940/1000, score: 115, cumulative reward: -211.35137515612888
Episode: 4941/1000, score: 77, cumulative reward: -287.50157750360984
Episode: 4942/1000, score: 87, cumulative reward: -248.9466329693079
Episode: 4943/1000, score: 116, cumulative reward: -140.6445947350037
Episode: 4944/1000, score: 82, cumulative reward: -383.5939316748585
Episode: 4945/1000, score: 95, cumulative reward: -67.33333185527042
Episode: 4946/1000, score: 116, cumulative reward: -6.463622005342771
Episode: 4947/1000, score: 106, cumulative reward: -117.14283192768872
Episode: 4948/1000, score: 85, cumulative reward: -167.34438849505014
Episode: 4949/1000, score: 87, cumulative reward: -313.1129096672898
Episode: 4950/1000, score: 98, cumulative reward: -258.4427927805451
Episode: 4951/1000, score: 104, cumulative reward: -75.14676367866815
Episode: 4952/1000, score: 76, cumulative reward: -117.92279218162602
Episode: 4953/1000, score: 81, cumulative reward: -248.1946780332365
Episode: 4954/1000, score: 101, cumulative reward: -118.90518051332153
Episode: 4955/1000, score: 88, cumulative reward: -278.6397462870718
Episode: 4956/1000, score: 80, cumulative reward: -168.23726547051297
Episode: 4957/1000, score: 131, cumulative reward: -269.45693826060784
Episode: 4958/1000, score: 78, cumulative reward: -102.21621787954375
Episode: 4959/1000, score: 60, cumulative reward: -174.72513130415354
Episode: 4960/1000, score: 85, cumulative reward: -158.90157515930773
Episode: 4961/1000, score: 66, cumulative reward: -231.43969792439415
Episode: 4962/1000, score: 61, cumulative reward: -127.20177571337186
Episode: 4963/1000, score: 69, cumulative reward: -156.10480883365574
Episode: 4964/1000, score: 62, cumulative reward: -135.51407250714243
Episode: 4965/1000, score: 63, cumulative reward: -198.37801105218784
Episode: 4966/1000, score: 79, cumulative reward: -183.23038462010788
Episode: 4967/1000, score: 89, cumulative reward: -329.9573055379198
Episode: 4968/1000, score: 74, cumulative reward: -304.3011961600491
Episode: 4969/1000, score: 69, cumulative reward: -161.96019928245758
Episode: 4970/1000, score: 87, cumulative reward: -259.3521430289924
Episode: 4971/1000, score: 64, cumulative reward: -250.85700031654002
Episode: 4972/1000, score: 84, cumulative reward: -162.00268275018414
Episode: 4973/1000, score: 90, cumulative reward: -463.688346994615
Episode: 4974/1000, score: 63, cumulative reward: -165.83439993935227
Episode: 4975/1000, score: 83, cumulative reward: -108.563647649204
Episode: 4976/1000, score: 58, cumulative reward: -124.59752170275615
Episode: 4977/1000, score: 72, cumulative reward: -138.0766544328472
Episode: 4978/1000, score: 77, cumulative reward: -279.4903701141835
Episode: 4979/1000, score: 77, cumulative reward: -156.7659982770745
Episode: 4980/1000, score: 52, cumulative reward: -141.80318177853133
Episode: 4981/1000, score: 100, cumulative reward: -68.6019436565166
Episode: 4982/1000, score: 65, cumulative reward: -155.06195101052256
Episode: 4983/1000, score: 56, cumulative reward: -92.73149585103687
Episode: 4984/1000, score: 66, cumulative reward: -242.11666032562766
Episode: 4985/1000, score: 64, cumulative reward: -200.10826743234566
Episode: 4986/1000, score: 92, cumulative reward: -294.2684956123633
Episode: 4987/1000, score: 98, cumulative reward: -95.38333354058322
Episode: 4988/1000, score: 70, cumulative reward: -169.734511863438
Episode: 4989/1000, score: 82, cumulative reward: -145.113199686523
Episode: 4990/1000, score: 91, cumulative reward: -102.94260739447282
Episode: 4991/1000, score: 75, cumulative reward: -122.06660506309169
Episode: 4992/1000, score: 84, cumulative reward: -132.42171687651407
Episode: 4993/1000, score: 55, cumulative reward: -121.42256204238869
Episode: 4994/1000, score: 51, cumulative reward: -123.67243193127429
Episode: 4995/1000, score: 87, cumulative reward: -126.1938932019001
Episode: 4996/1000, score: 61, cumulative reward: -118.94123117252063
Episode: 4997/1000, score: 76, cumulative reward: -182.92288906959394
Episode: 4998/1000, score: 66, cumulative reward: -151.5703633931717
Episode: 4999/1000, score: 71, cumulative reward: -167.22645460330332
"""



# 解析数据
episodes = []
rewards = []

# 按行分割数据，然后处理每行
for line in data.strip().split("\n"):
    if "模型状态字典已保存至" in line:
        continue  # 跳过包含模型保存信息的行
    parts = line.split(", ")
    episode_info = parts[0]
    reward_info = parts[2]

    # 提取episode编号
    episode_number = int(episode_info.split()[1].split("/")[0])

    # 提取cumulative reward
    cumulative_reward = float(reward_info.split(": ")[1])

    episodes.append(episode_number)
    rewards.append(cumulative_reward)

# 绘图
plt.figure(figsize=(10, 5))
plt.plot(episodes, rewards, marker='o', linestyle='-', color='b')
plt.title("Cumulative Reward per Episode")
plt.xlabel("Episode")
plt.ylabel("Cumulative Reward")
plt.grid(True)
plt.show()


SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 5887-5888: truncated \UXXXXXXXX escape (1521401125.py, line 4)