In [1]:
from MuJoCo_Gym.mujoco_rl import MuJoCoRL
from MuJoCo_Gym.wrappers import GymnasiumWrapper, GymWrapper
from gymnasium.wrappers.frame_stack import FrameStack
from gymnasium.experimental.wrappers import NormalizeObservationV0
from dynamics import *
import argparse
import os
import random
import time
from distutils.util import strtobool
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions.normal import Normal
from torch.utils.tensorboard import SummaryWriter

from wrappers.record_episode_statistics import RecordEpisodeStatistics
from wrappers.frame_stack import FrameStack
from wrappers.normalizeObservation import NormalizeObservation
from wrappers.normalizeRewards import NormalizeReward

from progressbar import progressbar

# Dynamics

In [2]:
from sklearn.metrics import mean_squared_error
from autoencoder import load_autoencoder_model, get_a_single_image_embedding
import math
import mujoco

## Vision

In [3]:
class Image:
    def __init__(self, environment):
        self.environment = environment
        self.observation_space = {"low": [0 for _ in range(50)], "high": [1 for _ in range(50)]}
        self.action_space = {"low": [], "high": []}
        self.autoencoder = load_autoencoder_model("models/autoencoder30.keras")
        self.index = 0

    def dynamic(self, agent, actions):
        self.index = self.index + 1
        image = self.environment.get_camera_data(agent + "_camera")
        result = get_a_single_image_embedding(self.autoencoder, image)
        # cv2.imwrite(f"/Users/cowolff/Documents/GitHub/s.language_experiments/images/{agent}_{self.index}.png", image)
        return 0, result, False, {}

## Communication

In [4]:
class Communication:
    def __init__(self, environment):
        self.environment = environment
        self.observation_space = {"low": [0, 0, 0, 0], "high": [1, 1, 1, 1]}
        self.action_space = {"low": [0, 0, 0, 0], "high": [1, 1, 1, 1]}

    def dynamic(self, agent, actions):
        if "utterance" not in self.environment.data_store[agent].keys():
            self.environment.data_store[agent]["utterance"] = None
        if agent == "receiver":
            utterance = [0, 0, 0, 0]
            if "target_color" in self.environment.data_store.keys():
                utterance[np.argmax(self.environment.data_store["target_color"])] = 1
            observation = utterance
        elif agent == "sender":
            utterance = [0, 0, 0, 0]
            utterance[np.argmax(actions)] = 1
            self.environment.data_store[agent]["utterance"] = actions
            self.environment.data_store[agent]["utterance_max"] = utterance
            observation = [0, 0, 0, 0]
        else:
            print("Dafaq is going on here?")
        return 0, observation, False, {}

## Reward Dynamic Function
The reward function for the receiver gives a positive reward for the agent to move closer to the current target object.<br/>
The sender in turn gets a positive reward if it chooses the correct word encoding for the color of the current target object.

In [5]:
class Reward:
    def __init__(self, environment):
        self.environment = environment
        self.observation_space = {"low": [], "high": []}
        self.action_space = {"low": [], "high": []}
        self.choices = ["choice_1", "choice_2"]

    def dynamic(self, agent, actions):
        if not "target" in self.environment.data_store.keys():
            color = self.environment.get_data("reference_geom")["color"]
            for choice in self.choices:
                if (color == self.environment.get_data(choice + "_geom")["color"]).all():
                    self.environment.data_store["target"] = choice
                    self.environment.data_store["target_color"] = self.environment.get_data(choice + "_geom")["color"]
                    self.environment.data_store["last_distance"] = copy.deepcopy(self.environment.distance("receiver_geom", choice + "_geom"))
        if agent == "receiver":
            target = self.environment.data_store["target"]
            new_distance = self.environment.distance("receiver_geom", target + "_geom")
            reward = (self.environment.data_store["last_distance"] - new_distance) * 10
            self.environment.data_store["last_distance"] = copy.deepcopy(new_distance)
        elif agent == "sender":
            reference = [0, 0, 0, 0]
            color = self.environment.data_store["target_color"]
            reference[np.argmax(color)] = 1
            reward = 0
            if "utterance" in self.environment.data_store[agent].keys():
                reward = -1 * mean_squared_error(reference, self.environment.data_store[agent]["utterance"])
        return reward, [], False, {}

## Accuracy Measurement Dynamic

In [6]:
class Accuracy:
    def __init__(self, environment):
        self.environment = environment
        self.observation_space = {"low": [], "high": []}
        self.action_space = {"low": [], "high": []}
        self.accuracies = []
        self.variances = []
        self.sendAccuracies = []
        self.sendVariances = []
        self.currentSend = []
        self.report_accuracy = {"sender": 0, "receiver": 0}

    def dynamic(self, agent, actions):
        choices = ["choice_1", "choice_2"]
        variance = {"choice_1":1, "choice_2":-1}
        if "target" in self.environment.data_store.keys():
            if "sendVariances" not in self.environment.data_store.keys():
                self.environment.data_store["sendVariances"] = True
                self.currentSend = [0, 0, 0, 0]
            target = self.environment.data_store["target"]
            # if any(self.environment.collision(ankle, target + "_geom") for ankle in ["left_leg_geom_2", "left_ankle_geom_2", "right_leg_geom_2", "right_ankle_geom_2", "back_leg_geom_2", "third_ankle_geom_2", "rightback_leg_geom_2", "fourth_ankle_geom_2"]):
            if self.environment.collision("receiver_geom", target + "_geom"):
                self.accuracies.append(1)
                self.variances.append(variance[target])

                if len(self.variances) > 50:
                    report_variance = 1 - abs(sum(self.variances[-50:]) / 50)
                    self.report_accuracy["receiver"] = sum(self.accuracies[-50:]) / 50
            # elif any(self.environment.collision(ankle, [choice for choice in choices if choice != target][0] + "_geom") for ankle in ["left_leg_geom_2", "left_ankle_geom_2", "right_leg_geom_2", "right_ankle_geom_2", "back_leg_geom_2", "third_ankle_geom_2", "rightback_leg_geom_2", "fourth_ankle_geom_2"]):
            elif self.environment.collision("receiver_geom", [choice for choice in choices if choice != target][0] + "_geom"):
                self.accuracies.append(0)
                self.variances.append(variance[[choice for choice in choices if choice != target][0]])

                if len(self.variances) > 50:
                    report_variance = 1 - abs(sum(self.variances[-50:]) / 50)
                    self.report_accuracy[0] = sum(self.accuracies[-50:]) / 50
            if "utterance_max" in self.environment.data_store[agent].keys():
                reference = [0, 0, 0, 0]
                color = self.environment.data_store["target_color"]
                reference[np.argmax(color)] = 1
                self.currentSend = np.add(self.currentSend, self.environment.data_store[agent]["utterance_max"])

                if self.environment.data_store[agent]["utterance_max"]  == reference:
                    self.sendAccuracies.append(1)
                else:
                    self.sendAccuracies.append(0)
        return 0, [], False, {}

# Reward Functions

In [7]:
def target_reward(mujoco_gym, agent):
    if agent == "receiver":
        choices = ["choice_1", "choice_2"]
        if not "target" in mujoco_gym.data_store.keys():
                color = mujoco_gym.get_data("reference_geom")["color"]
                for choice in choices:
                    if (color == mujoco_gym.get_data(choice + "_geom")["color"]).all():
                        mujoco_gym.data_store["target"] = choice
                        mujoco_gym.data_store["target_color"] = mujoco_gym.get_data(choice + "_geom")["color"]
        
        target = mujoco_gym.data_store["target"]
        # for ankle in ["left_leg_geom_2", "left_ankle_geom_2", "right_leg_geom_2", "right_ankle_geom_2", "back_leg_geom_2", "third_ankle_geom_2", "rightback_leg_geom_2", "fourth_ankle_geom_2"]:
        for ankle in ["receiver_geom"]:
            if mujoco_gym.collision(ankle, target + "_geom"):
                return 1
            elif mujoco_gym.collision(ankle, [choice for choice in choices if choice != target][0] + "_geom"):
                return -1
    return 0

In [8]:
def collision_reward(mujoco_gym, agent):
    for border in ["border1_geom", "border2_geom", "border3_geom", "border4_geom", "border5_geom"]:
        # for ankle in ["left_leg_geom_2", "left_ankle_geom_2", "right_leg_geom_2", "right_ankle_geom_2", "back_leg_geom_2", "third_ankle_geom_2", "rightback_leg_geom_2", "fourth_ankle_geom_2"]:
        for ankle in [agent + "_geom"]:
            if mujoco_gym.collision(border, ankle):
                return -0.1
    return 0

# Environment Setup

In [9]:
def make_env(config_dict):
    def thunk():
        window = 5
        env = MuJoCoRL(config_dict=config_dict)
        # env = GymWrapper(env, "receiver")
        # env = FrameStack(env, 4)
        env = NormalizeObservation(env)
        env = NormalizeReward(env)
        # env = RecordEpisodeStatistics(env)
        return env

    return thunk

In [11]:
xml_files = ["levels/" + file for file in os.listdir("levels/")]
agents = ["sender", "receiver"]

config_dict = {"xmlPath":xml_files, 
                   "agents":agents, 
                   "rewardFunctions":[collision_reward, target_reward], 
                   "doneFunctions":[target_done, border_done], 
                   "skipFrames":5,
                   "environmentDynamics":[Image, Reward, Communication, Accuracy],
                   "freeJoint":True,
                   "renderMode":False,
                   "maxSteps":1024,
                   "agentCameras":True}

env = make_env(config_dict)()

env: MUJOCO_GL=egl


FatalError: gladLoadGL error

# RL Model

In [11]:
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer

class Agent(nn.Module):
    def __init__(self, envs):
        super(Agent, self).__init__()
        self.critic = nn.Sequential(
            nn.Flatten(),
            layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 128)),
            nn.Tanh(),
            layer_init(nn.Linear(128, 128)),
            nn.Tanh(),
            layer_init(nn.Linear(128, 1), std=1.0),
        )
        self.actor_mean = nn.Sequential(
            nn.Flatten(),
            layer_init(nn.Linear(np.array(envs.observation_space.shape).prod(), 128)),
            nn.Tanh(),
            layer_init(nn.Linear(128, 128)),
            nn.Tanh(),
            layer_init(nn.Linear(128, np.prod(envs.action_space.shape)), std=0.01),
        )
        self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.action_space.shape)))

    def get_value(self, x):
        return self.critic(x)

    def get_action_and_value(self, x, action=None):
        action_mean = self.actor_mean(x)
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(x)

# Buffer

In [12]:
class Buffer():
    def __init__(self, num_steps, envs, num_envs, device):
        self.obs = torch.zeros((num_steps, num_envs) + envs.observation_space.shape).to(device)
        self.actions = torch.zeros((num_steps, num_envs) + envs.action_space.shape).to(device)
        self.logprobs = torch.zeros((num_steps, num_envs)).to(device)
        self.rewards = torch.zeros((num_steps, num_envs)).to(device)
        self.dones = torch.zeros((num_steps, num_envs)).to(device)
        self.values = torch.zeros((num_steps, num_envs)).to(device)

# Update Function

In [13]:
def update_agent(agent, buffer, optimizer, next_obs, next_done, env, batch_size, update_epochs, minibatch_size, clip_coef, vf_coef, ent_coef, max_grad_norm, target_kl, clip_vloss, norm_adv, gae_lambda, gae, gamma, device, num_steps):

    with torch.no_grad():
        next_value = agent.get_value(next_obs).reshape(1, -1)
        if gae:
            advantages = torch.zeros_like(buffer.rewards).to(device)
            lastgaelam = 0
            for t in reversed(range(num_steps)):
                if t == num_steps - 1:
                    nextnonterminal = 1.0 - next_done
                    nextvalues = next_value
                else:
                    nextnonterminal = 1.0 - buffer.dones[t + 1]
                    nextvalues = buffer.values[t + 1]
                delta = buffer.rewards[t] + gamma * nextvalues * nextnonterminal - buffer.values[t]
                advantages[t] = lastgaelam = delta + gamma * gae_lambda * nextnonterminal * lastgaelam
            returns = advantages + buffer.values
        else:
            returns = torch.zeros_like(buffer.rewards).to(device)
            for t in reversed(range(num_steps)):
                if t == num_steps - 1:
                    nextnonterminal = 1.0 - next_done
                    next_return = next_value
                else:
                    nextnonterminal = 1.0 - buffer.dones[t + 1]
                    next_return = returns[t + 1]
                returns[t] = buffer.rewards[t] + gamma * nextnonterminal * next_return
            advantages = returns - buffer.values

    # flatten the batch
    b_obs = buffer.obs.reshape((-1,) + env.observation_space.shape)
    b_logprobs = buffer.logprobs.reshape(-1)
    b_actions = buffer.actions.reshape((-1,) + env.action_space.shape)
    b_advantages = advantages.reshape(-1)
    b_returns = returns.reshape(-1)
    b_values = buffer.values.reshape(-1)

    # Optimizing the policy and value network
    b_inds = np.arange(batch_size)
    clipfracs = []
    for epoch in range(update_epochs):
        np.random.shuffle(b_inds)
        for start in range(0, batch_size, minibatch_size):
            end = start + minibatch_size
            mb_inds = b_inds[start:end]

            _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions[mb_inds])
            logratio = newlogprob - b_logprobs[mb_inds]
            ratio = logratio.exp()

            with torch.no_grad():
                # calculate approx_kl http://joschu.net/blog/kl-approx.html
                old_approx_kl = (-logratio).mean()
                approx_kl = ((ratio - 1) - logratio).mean()
                clipfracs += [((ratio - 1.0).abs() > clip_coef).float().mean().item()]

            mb_advantages = b_advantages[mb_inds]
            if norm_adv:
                mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8)

            # Policy loss
            pg_loss1 = -mb_advantages * ratio
            pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef)
            pg_loss = torch.max(pg_loss1, pg_loss2).mean()

            # Value loss
            newvalue = newvalue.view(-1)
            if clip_vloss:
                v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2
                v_clipped = b_values[mb_inds] + torch.clamp(
                    newvalue - b_values[mb_inds],
                    -clip_coef,
                    clip_coef,
                )
                v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2
                v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
                v_loss = 0.5 * v_loss_max.mean()
            else:
                v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean()

            entropy_loss = entropy.mean()
            loss = pg_loss - ent_coef * entropy_loss + v_loss * vf_coef

            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(agent.parameters(), max_grad_norm)
            optimizer.step()

        if target_kl is not None:
            if approx_kl > target_kl:
                break

    y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
    var_y = np.var(y_true)
    explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y

# Training

## Helper Function

In [14]:
def initialize_agent(env, device, learning_rate):
    agent = Agent(env).to(device)
    optimizer = optim.Adam(agent.parameters(), lr=learning_rate, eps=1e-5)
    return agent, optimizer

def get_action_and_update_buffer(agent, obs, buffer, step):
    with torch.no_grad():
        action, logprob, _, value = agent.get_action_and_value(obs)
        buffer.values[step] = value.flatten()
    buffer.actions[step] = action
    buffer.logprobs[step] = logprob
    return action

def reset_environment(env, device):
    next_obs, infos = env.reset()
    next_obs = {k: torch.Tensor(v).unsqueeze(0).to(device) for k, v in next_obs.items()}
    return next_obs, infos

## Hyperparameters

In [15]:
exp_name = "Sender box"

learning_rate = 1e-5
seed = 1
# total_timesteps = 20000000
total_timesteps = 1000000
torch_deterministic = True
cuda = False
mps = False
track = False
wandb_project_name = "ppo-implementation-details"
wandb_entity = None
capture_video = False

# Algorithm-specific arguments
num_envs = 1
num_steps = 2048
anneal_lr = True
gae = True
gamma = 0.99
gae_lambda = 0.95
num_minibatches = 128
update_epochs = 10
norm_adv = True
clip_coef = 0.2
clip_vloss = True
ent_coef = 0.0
vf_coef = 0.5
max_grad_norm = 0.5
target_kl = None
store_freq = 20

# Calculate derived variables
batch_size = int(num_envs * num_steps)
minibatch_size = int(batch_size // num_minibatches)

In [17]:
torch.set_default_dtype(torch.float32)

run_name = f"{exp_name}__{seed}__{int(time.time())}"

writer = SummaryWriter(f"runs/{run_name}")

writer.add_text("environment/level_number", str(len(xml_files)), 0)
writer.add_text("environment/agents", ', '.join(agents), 0)
writer.add_text("hyperparameters/learning_rate", str(learning_rate), 0)
writer.add_text("hyperparameters/network_size", ', '.join(str(e) for e in [512, 256]), 0)
writer.add_text("hyperparameters/batch", str(minibatch_size), 0)

# TRY NOT TO MODIFY: seeding
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = torch_deterministic

device = torch.device("cuda" if torch.cuda.is_available() and cuda else "cpu")

obs, infos = env.reset()

sender, sender_optimizer = initialize_agent(env, device, learning_rate)
receiver, receiver_optimizer = initialize_agent(env, device, learning_rate)

buffer_sender = Buffer(num_steps, env, num_envs, device)
buffer_receiver = Buffer(num_steps, env, num_envs, device)

global_step = 0
start_time = time.time()
next_obs, infos = reset_environment(env, device)

next_done = {"sender": torch.zeros(num_envs).to(device), "receiver": torch.zeros(num_envs).to(device)}

num_updates = total_timesteps // batch_size
train_start = time.time()

epoch_lengths = []
current_length = 0

for update in progressbar(range(1, num_updates + 1), redirect_stdout=True):
    # Annealing the rate if instructed to do so.
    if anneal_lr:
        frac = 1.0 - (update - 1.0) / num_updates
        lrnow = frac * learning_rate
        sender_optimizer.param_groups[0]["lr"] = lrnow
        receiver_optimizer.param_groups[0]["lr"] = lrnow
    
    epoch_rewards = {"sender":0, "receiver":0}
    current_rewards = {"sender":[], "receiver":[]}
    variances = {"sender":[], "receiver":[]}
    epoch_runs = 0
    episode_accuracies = 0
    episode_sendAccuracies = 0
    for step in range(0, num_steps):
        global_step += 1 * num_envs
        current_length += 1
        buffer_sender.obs[step] = next_obs["sender"]
        buffer_receiver.obs[step] = next_obs["receiver"]


        sender_action = get_action_and_update_buffer(sender, next_obs["sender"], buffer_sender, step)
        receiver_action = get_action_and_update_buffer(receiver, next_obs["receiver"], buffer_receiver, step)

        next_obs, reward, terminations, truncations, info = env.step({"sender": sender_action.cpu().numpy()[0], "receiver": receiver_action.cpu().numpy()[0]})
        current_rewards["sender"].append(reward["sender"])
        current_rewards["receiver"].append(reward["receiver"])
        next_obs = {"sender": torch.Tensor(next_obs["sender"]).unsqueeze(0).to(device), "receiver": torch.Tensor(next_obs["receiver"]).unsqueeze(0).to(device)}

        if terminations["sender"] or terminations["receiver"] or truncations["sender"] or truncations["receiver"]:
            next_obs, infos = reset_environment(env, device)
            epoch_rewards["sender"] += sum(current_rewards["sender"])
            epoch_rewards["receiver"] += sum(current_rewards["receiver"])

            epoch_lengths.append(current_length)
            current_length = 0

            dynamic = env.env.env.environment_dynamics[3]

            if len(dynamic.sendAccuracies) > 512:
                episode_sendAccuracies = sum(dynamic.sendAccuracies[-512:]) / 512
                del dynamic.sendAccuracies[:-513]
                writer.add_scalar("charts/sender/accuracies", episode_sendAccuracies, global_step)

            if len(dynamic.accuracies) > 4:
                window = min(15, len(dynamic.accuracies))
                episode_accuracies = sum(dynamic.accuracies[-1 * window:]) / window
                writer.add_scalar("charts/receiver/accuracies", episode_accuracies, global_step)
                if window == 15:
                    del dynamic.accuracies[:-16]

            if len(dynamic.variances) > 4:
                window = min(15, len(dynamic.variances))
                current_variance = sum(dynamic.variances[-1 * window:]) / window
                writer.add_scalar("charts/receiver_variance", current_variance, global_step)
                if window == 15:
                    del dynamic.variances[:-16]

            if len(epoch_lengths) > 3:
                window = min(10, len(epoch_lengths))
                epoch_length = sum(epoch_lengths[-1 * window:]) / window
                writer.add_scalar("charts/episodic_length", epoch_length, global_step)
                if window == 10:
                    del epoch_lengths[:-11]
            epoch_runs += 1
        
        buffer_sender.rewards[step] = torch.tensor(reward["sender"]).to(device).view(-1)
        buffer_receiver.rewards[step] = torch.tensor(reward["receiver"]).to(device).view(-1)
        next_done = {"sender": torch.Tensor([terminations["sender"]]).to(device), "receiver": torch.Tensor([terminations["receiver"]]).to(device)}
    if update % store_freq == 0:
        torch.save(sender, "models/model" + str(start_time) + ".pth")
        torch.save(receiver, "models/model" + str(start_time) + ".pth")

    update_agent(sender, buffer_sender, sender_optimizer, next_obs["sender"], next_done["sender"], env, batch_size, update_epochs, minibatch_size, clip_coef, vf_coef, ent_coef, max_grad_norm, target_kl, clip_vloss, norm_adv, gae_lambda, gae, gamma, device, step)
    update_agent(receiver, buffer_receiver, receiver_optimizer, next_obs["receiver"], next_done["receiver"], env, batch_size, update_epochs, minibatch_size, clip_coef, vf_coef, ent_coef, max_grad_norm, target_kl, clip_vloss, norm_adv, gae_lambda, gae, gamma, device, step)

    writer.add_scalar("charts/learning_rate", sender_optimizer.param_groups[0]["lr"], global_step)
    writer.add_scalar("charts/sender/episodic_return", epoch_rewards["sender"] / epoch_runs, global_step)
    writer.add_scalar("charts/receiver/episodic_return", epoch_rewards["receiver"] / epoch_runs, global_step)
    print("SPS:", int(global_step / (time.time() - start_time)), "Average Reward:", epoch_rewards["sender"] / epoch_runs)
    writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)

torch.save(sender, "models/model" + str(start_time) + ".pth")
torch.save(receiver, "models/model" + str(start_time) + ".pth")
writer.close()

  0% (0 of 488) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--


SPS: 69 Average Reward: -21.402849167364266


  0% (1 of 488) |                        | Elapsed Time: 0:00:29 ETA:   4:00:47


SPS: 73 Average Reward: -10.698389484964654


  0% (2 of 488) |                        | Elapsed Time: 0:00:55 ETA:   3:29:46


SPS: 75 Average Reward: -10.883122260329063


  0% (3 of 488) |                        | Elapsed Time: 0:01:20 ETA:   3:25:26


SPS: 76 Average Reward: -10.502174614277099


  0% (4 of 488) |                        | Elapsed Time: 0:01:46 ETA:   3:26:05


SPS: 77 Average Reward: -10.577646543409038


  1% (5 of 488) |                        | Elapsed Time: 0:02:11 ETA:   3:24:25


SPS: 78 Average Reward: -10.761168199055467


  1% (6 of 488) |                        | Elapsed Time: 0:02:37 ETA:   3:25:01


SPS: 78 Average Reward: -10.47959500819738


  1% (7 of 488) |                        | Elapsed Time: 0:03:02 ETA:   3:21:42


SPS: 78 Average Reward: -11.351008226177058


  1% (8 of 488) |                        | Elapsed Time: 0:03:28 ETA:   3:23:29


SPS: 79 Average Reward: -10.914489566813891


  1% (9 of 488) |                        | Elapsed Time: 0:03:53 ETA:   3:20:29


SPS: 79 Average Reward: -10.579219104138666


  2% (10 of 488) |                       | Elapsed Time: 0:04:18 ETA:   3:22:49


SPS: 79 Average Reward: -10.334683100041154


  2% (11 of 488) |                       | Elapsed Time: 0:04:44 ETA:   3:22:12


SPS: 79 Average Reward: -11.152880037409814


  2% (12 of 488) |                       | Elapsed Time: 0:05:09 ETA:   3:22:03


SPS: 79 Average Reward: -10.142538219656108


  2% (13 of 488) |                       | Elapsed Time: 0:05:34 ETA:   3:20:27


SPS: 79 Average Reward: -9.959650946824503


  2% (14 of 488) |                       | Elapsed Time: 0:06:00 ETA:   3:20:32


SPS: 79 Average Reward: -10.474531780867462


  3% (15 of 488) |                       | Elapsed Time: 0:06:25 ETA:   3:19:28


SPS: 79 Average Reward: -9.933603193617023


  3% (16 of 488) |                       | Elapsed Time: 0:06:51 ETA:   3:22:37


SPS: 79 Average Reward: -10.890004312295018


  3% (17 of 488) |                       | Elapsed Time: 0:07:16 ETA:   3:19:14


SPS: 79 Average Reward: -10.47089198918784


  3% (18 of 488) |                       | Elapsed Time: 0:07:42 ETA:   3:20:54


SPS: 79 Average Reward: -10.350919172348934


  3% (19 of 488) |                       | Elapsed Time: 0:08:07 ETA:   3:17:16


SPS: 79 Average Reward: -9.879150254610835


  4% (20 of 488) |                       | Elapsed Time: 0:08:33 ETA:   3:19:23


SPS: 79 Average Reward: -10.1137585897501


  4% (21 of 488) |                       | Elapsed Time: 0:08:58 ETA:   3:17:33


SPS: 79 Average Reward: -10.53877992519145


  4% (22 of 488) |#                      | Elapsed Time: 0:09:24 ETA:   3:18:21


SPS: 79 Average Reward: -11.326275804453918


  4% (23 of 488) |#                      | Elapsed Time: 0:09:49 ETA:   3:15:59


SPS: 79 Average Reward: -11.198265384936299


  4% (24 of 488) |#                      | Elapsed Time: 0:10:14 ETA:   3:16:52


SPS: 79 Average Reward: -10.161693275175722


  5% (25 of 488) |#                      | Elapsed Time: 0:10:40 ETA:   3:20:13


SPS: 79 Average Reward: -11.728046947841676


  5% (26 of 488) |#                      | Elapsed Time: 0:11:06 ETA:   3:15:40


SPS: 79 Average Reward: -11.39858994140274


  5% (27 of 488) |#                      | Elapsed Time: 0:11:31 ETA:   3:12:32


SPS: 80 Average Reward: -9.495085222786724


  5% (28 of 488) |#                      | Elapsed Time: 0:11:56 ETA:   3:14:41


SPS: 80 Average Reward: -11.139228523350516


  5% (29 of 488) |#                      | Elapsed Time: 0:12:22 ETA:   3:15:15


SPS: 80 Average Reward: -21.32112573614406


  6% (30 of 488) |#                      | Elapsed Time: 0:12:47 ETA:   3:10:22


SPS: 80 Average Reward: -27.85507888098484


  6% (31 of 488) |#                      | Elapsed Time: 0:13:12 ETA:   3:12:42


SPS: 80 Average Reward: -25.41796093525972


  6% (32 of 488) |#                      | Elapsed Time: 0:13:37 ETA:   3:13:19


SPS: 80 Average Reward: -23.197978561162813


  6% (33 of 488) |#                      | Elapsed Time: 0:14:03 ETA:   3:12:10


SPS: 80 Average Reward: -24.457453312100526


  6% (34 of 488) |#                      | Elapsed Time: 0:14:28 ETA:   3:10:58


SPS: 80 Average Reward: -24.160699151072258


  7% (35 of 488) |#                      | Elapsed Time: 0:14:53 ETA:   3:12:54


SPS: 80 Average Reward: -22.314837756779063


  7% (36 of 488) |#                      | Elapsed Time: 0:15:19 ETA:   3:11:48


SPS: 80 Average Reward: -23.630039977836518


  7% (37 of 488) |#                      | Elapsed Time: 0:15:44 ETA:   3:11:25


SPS: 80 Average Reward: -25.046174535451186


  7% (38 of 488) |#                      | Elapsed Time: 0:16:10 ETA:   3:10:00


SPS: 80 Average Reward: -22.072635204650467


  7% (39 of 488) |#                      | Elapsed Time: 0:16:35 ETA:   3:11:19


SPS: 80 Average Reward: -18.92851208891713


  8% (40 of 488) |#                      | Elapsed Time: 0:17:03 ETA:   3:29:11


SPS: 79 Average Reward: -18.0437976079799


  8% (41 of 488) |#                      | Elapsed Time: 0:17:30 ETA:   3:19:39


SPS: 79 Average Reward: -19.734297514233816


  8% (42 of 488) |#                      | Elapsed Time: 0:17:57 ETA:   3:19:09


SPS: 79 Average Reward: -21.297956424784214


  8% (43 of 488) |##                     | Elapsed Time: 0:18:23 ETA:   3:15:03


SPS: 79 Average Reward: -18.45910426547281


  9% (44 of 488) |##                     | Elapsed Time: 0:18:49 ETA:   3:14:44


SPS: 79 Average Reward: -16.92371939366536


  9% (45 of 488) |##                     | Elapsed Time: 0:19:16 ETA:   3:12:06


SPS: 79 Average Reward: -10.957537925434353


  9% (46 of 488) |##                     | Elapsed Time: 0:19:42 ETA:   3:13:42


SPS: 79 Average Reward: -16.25550307257537


  9% (47 of 488) |##                     | Elapsed Time: 0:20:08 ETA:   3:11:45


SPS: 79 Average Reward: -22.68007882932438


  9% (48 of 488) |##                     | Elapsed Time: 0:20:34 ETA:   3:12:15


SPS: 79 Average Reward: -21.045077920331988


 10% (49 of 488) |##                     | Elapsed Time: 0:21:00 ETA:   3:11:52


SPS: 79 Average Reward: -20.8593066927096


 10% (50 of 488) |##                     | Elapsed Time: 0:21:27 ETA:   3:11:40


SPS: 79 Average Reward: -20.47932796215015


 10% (51 of 488) |##                     | Elapsed Time: 0:21:53 ETA:   3:10:14


SPS: 79 Average Reward: -17.315842884378135


 10% (52 of 488) |##                     | Elapsed Time: 0:22:19 ETA:   3:10:00


SPS: 79 Average Reward: -17.12718015352596


 10% (53 of 488) |##                     | Elapsed Time: 0:22:45 ETA:   3:10:03


SPS: 79 Average Reward: -12.218091111651647


 11% (54 of 488) |##                     | Elapsed Time: 0:23:11 ETA:   3:09:01


SPS: 79 Average Reward: -16.5737643137921


 11% (55 of 488) |##                     | Elapsed Time: 0:23:38 ETA:   3:09:48


SPS: 79 Average Reward: -24.111954442889573


 11% (56 of 488) |##                     | Elapsed Time: 0:24:04 ETA:   3:07:18


SPS: 79 Average Reward: -11.879696973053262


 11% (57 of 488) |##                     | Elapsed Time: 0:24:30 ETA:   3:08:37


SPS: 79 Average Reward: -20.21109648246531


 11% (58 of 488) |##                     | Elapsed Time: 0:24:56 ETA:   3:07:26


SPS: 79 Average Reward: -12.172337983757663


 12% (59 of 488) |##                     | Elapsed Time: 0:25:22 ETA:   3:06:21


SPS: 79 Average Reward: -10.747591781552387


 12% (60 of 488) |##                     | Elapsed Time: 0:25:48 ETA:   3:07:41


SPS: 79 Average Reward: -15.84645544570436


 12% (61 of 488) |##                     | Elapsed Time: 0:26:15 ETA:   3:06:31


SPS: 79 Average Reward: -17.924605326708875


 12% (62 of 488) |##                     | Elapsed Time: 0:26:41 ETA:   3:05:17


SPS: 79 Average Reward: -11.906230762121481


 12% (63 of 488) |##                     | Elapsed Time: 0:27:07 ETA:   3:05:42


SPS: 79 Average Reward: -7.914220241634613


 13% (64 of 488) |###                    | Elapsed Time: 0:27:33 ETA:   3:05:21


SPS: 79 Average Reward: -8.298650122851008


 13% (65 of 488) |###                    | Elapsed Time: 0:27:59 ETA:   3:06:19


SPS: 79 Average Reward: -14.445058988761382


 13% (66 of 488) |###                    | Elapsed Time: 0:28:25 ETA:   3:01:58


SPS: 79 Average Reward: -17.54290429595998


 13% (67 of 488) |###                    | Elapsed Time: 0:28:52 ETA:   3:05:09


SPS: 79 Average Reward: -13.917034770893993


 13% (68 of 488) |###                    | Elapsed Time: 0:29:18 ETA:   3:03:22


SPS: 79 Average Reward: -13.038045256449742


 14% (69 of 488) |###                    | Elapsed Time: 0:29:44 ETA:   3:03:22


SPS: 79 Average Reward: -12.96237703072425


 14% (70 of 488) |###                    | Elapsed Time: 0:30:10 ETA:   3:02:25


SPS: 79 Average Reward: -9.496543893538096


 14% (71 of 488) |###                    | Elapsed Time: 0:30:37 ETA:   3:01:48


SPS: 79 Average Reward: -10.563489235111517


 14% (72 of 488) |###                    | Elapsed Time: 0:31:03 ETA:   3:01:44


SPS: 79 Average Reward: -16.130911664065685


 14% (73 of 488) |###                    | Elapsed Time: 0:31:29 ETA:   2:59:47


SPS: 79 Average Reward: -16.131473951257025


 15% (74 of 488) |###                    | Elapsed Time: 0:31:55 ETA:   3:01:06


SPS: 79 Average Reward: -17.76894034446306


 15% (75 of 488) |###                    | Elapsed Time: 0:32:21 ETA:   3:00:43


SPS: 79 Average Reward: -20.25255545610023


 15% (76 of 488) |###                    | Elapsed Time: 0:32:47 ETA:   2:59:26


SPS: 79 Average Reward: -13.552829547877256


 15% (77 of 488) |###                    | Elapsed Time: 0:33:13 ETA:   2:58:01


SPS: 79 Average Reward: -16.3398345639483


 15% (78 of 488) |###                    | Elapsed Time: 0:33:40 ETA:   2:58:58


SPS: 79 Average Reward: -12.415059650214575


 16% (79 of 488) |###                    | Elapsed Time: 0:34:06 ETA:   3:00:02


SPS: 79 Average Reward: -12.276517194972278


 16% (80 of 488) |###                    | Elapsed Time: 0:34:32 ETA:   2:58:17


SPS: 79 Average Reward: -15.06842951983488


 16% (81 of 488) |###                    | Elapsed Time: 0:34:59 ETA:   2:58:26


SPS: 79 Average Reward: -18.942076592720976


 16% (82 of 488) |###                    | Elapsed Time: 0:35:25 ETA:   2:58:44


SPS: 78 Average Reward: -19.200578370196606


 17% (83 of 488) |###                    | Elapsed Time: 0:35:51 ETA:   2:57:41


SPS: 78 Average Reward: -17.77053768985551


 17% (84 of 488) |###                    | Elapsed Time: 0:36:17 ETA:   2:56:13


SPS: 78 Average Reward: -14.120032235994035


 17% (85 of 488) |####                   | Elapsed Time: 0:36:43 ETA:   2:54:15


SPS: 78 Average Reward: -10.593796082269614


 17% (86 of 488) |####                   | Elapsed Time: 0:37:09 ETA:   2:53:20


SPS: 78 Average Reward: -13.519048129731205


 17% (87 of 488) |####                   | Elapsed Time: 0:37:36 ETA:   2:56:06


SPS: 78 Average Reward: -19.965427781778068


 18% (88 of 488) |####                   | Elapsed Time: 0:38:02 ETA:   2:54:25


SPS: 78 Average Reward: -12.250514698868898


 18% (89 of 488) |####                   | Elapsed Time: 0:38:28 ETA:   2:54:02


SPS: 78 Average Reward: -18.18392260685477


 18% (90 of 488) |####                   | Elapsed Time: 0:38:54 ETA:   2:51:51


SPS: 79 Average Reward: -13.838666242395187


 18% (91 of 488) |####                   | Elapsed Time: 0:39:18 ETA:   2:41:35


SPS: 79 Average Reward: -19.009275659762007


 18% (92 of 488) |####                   | Elapsed Time: 0:39:43 ETA:   2:41:13


SPS: 79 Average Reward: -19.400083536001684


 19% (93 of 488) |####                   | Elapsed Time: 0:40:07 ETA:   2:39:48


SPS: 79 Average Reward: -13.338895219498653


 19% (94 of 488) |####                   | Elapsed Time: 0:40:31 ETA:   2:40:09


SPS: 79 Average Reward: -12.707403576226703


 19% (95 of 488) |####                   | Elapsed Time: 0:40:56 ETA:   2:39:24


SPS: 79 Average Reward: -11.723402400673304


 19% (96 of 488) |####                   | Elapsed Time: 0:41:20 ETA:   2:40:14


SPS: 79 Average Reward: -18.47949827980288


 19% (97 of 488) |####                   | Elapsed Time: 0:41:45 ETA:   2:40:09


SPS: 79 Average Reward: -16.050229291256105


 20% (98 of 488) |####                   | Elapsed Time: 0:42:09 ETA:   2:38:14


SPS: 79 Average Reward: -14.149422170837061


 20% (99 of 488) |####                   | Elapsed Time: 0:42:33 ETA:   2:37:40


SPS: 79 Average Reward: -17.345473011001175


 20% (100 of 488) |####                  | Elapsed Time: 0:42:58 ETA:   2:37:40


SPS: 79 Average Reward: -16.111156708621436


 20% (101 of 488) |####                  | Elapsed Time: 0:43:22 ETA:   2:37:16


SPS: 79 Average Reward: -14.300665541014206


 20% (102 of 488) |####                  | Elapsed Time: 0:43:47 ETA:   2:36:37


SPS: 79 Average Reward: -10.544998387838255


 21% (103 of 488) |####                  | Elapsed Time: 0:44:11 ETA:   2:35:53


SPS: 79 Average Reward: -12.077160065976056


 21% (104 of 488) |####                  | Elapsed Time: 0:44:35 ETA:   2:37:25


SPS: 79 Average Reward: -11.831289197374522


 21% (105 of 488) |####                  | Elapsed Time: 0:45:00 ETA:   2:35:49


SPS: 79 Average Reward: -16.48306529073408


 21% (106 of 488) |####                  | Elapsed Time: 0:45:24 ETA:   2:36:14


SPS: 79 Average Reward: -13.648004843308968


 21% (107 of 488) |####                  | Elapsed Time: 0:45:49 ETA:   2:35:06


SPS: 79 Average Reward: -15.25362536176685


 22% (108 of 488) |####                  | Elapsed Time: 0:46:13 ETA:   2:34:08


SPS: 79 Average Reward: -12.486678189079132


 22% (109 of 488) |####                  | Elapsed Time: 0:46:38 ETA:   2:33:53


SPS: 79 Average Reward: -10.915727306083264


 22% (110 of 488) |####                  | Elapsed Time: 0:47:02 ETA:   2:32:40


SPS: 79 Average Reward: -13.809790217490114


 22% (111 of 488) |#####                 | Elapsed Time: 0:47:26 ETA:   2:33:07


SPS: 79 Average Reward: -10.913440002943432


 22% (112 of 488) |#####                 | Elapsed Time: 0:47:51 ETA:   2:34:10


SPS: 79 Average Reward: -14.41434308817573


 23% (113 of 488) |#####                 | Elapsed Time: 0:48:15 ETA:   2:32:20


SPS: 79 Average Reward: -13.1855469885212


 23% (114 of 488) |#####                 | Elapsed Time: 0:48:40 ETA:   2:32:51


SPS: 79 Average Reward: -12.251851363879533


 23% (115 of 488) |#####                 | Elapsed Time: 0:49:04 ETA:   2:31:28


SPS: 80 Average Reward: -13.166505239690132


 23% (116 of 488) |#####                 | Elapsed Time: 0:49:28 ETA:   2:30:54


SPS: 80 Average Reward: -11.462833226278315


 23% (117 of 488) |#####                 | Elapsed Time: 0:49:53 ETA:   2:31:22


SPS: 80 Average Reward: -14.229632621381851


 24% (118 of 488) |#####                 | Elapsed Time: 0:50:17 ETA:   2:30:24


SPS: 80 Average Reward: -11.788167679611949


 24% (119 of 488) |#####                 | Elapsed Time: 0:50:42 ETA:   2:29:39


SPS: 80 Average Reward: -14.270398063958277


 24% (120 of 488) |#####                 | Elapsed Time: 0:51:06 ETA:   2:29:50


SPS: 80 Average Reward: -10.519570593894699


 24% (121 of 488) |#####                 | Elapsed Time: 0:51:31 ETA:   2:30:10


SPS: 80 Average Reward: -11.821842866975354


 25% (122 of 488) |#####                 | Elapsed Time: 0:51:55 ETA:   2:28:45


SPS: 80 Average Reward: -8.088668302021746


 25% (123 of 488) |#####                 | Elapsed Time: 0:52:19 ETA:   2:28:26


SPS: 80 Average Reward: -7.574297121725365


 25% (124 of 488) |#####                 | Elapsed Time: 0:52:44 ETA:   2:28:36


SPS: 80 Average Reward: -6.931462875472998


 25% (125 of 488) |#####                 | Elapsed Time: 0:53:08 ETA:   2:28:23


SPS: 80 Average Reward: -11.481559409516066


 25% (126 of 488) |#####                 | Elapsed Time: 0:53:33 ETA:   2:28:09


SPS: 80 Average Reward: -14.766126994054233


 26% (127 of 488) |#####                 | Elapsed Time: 0:53:57 ETA:   2:27:48


SPS: 80 Average Reward: -11.838753629840852


 26% (128 of 488) |#####                 | Elapsed Time: 0:54:22 ETA:   2:26:30


SPS: 80 Average Reward: -13.320312780499567


 26% (129 of 488) |#####                 | Elapsed Time: 0:54:46 ETA:   2:27:13


SPS: 80 Average Reward: -13.931359595403


 26% (130 of 488) |#####                 | Elapsed Time: 0:55:11 ETA:   2:26:12


SPS: 80 Average Reward: -10.843215289826981


 26% (131 of 488) |#####                 | Elapsed Time: 0:55:35 ETA:   2:25:15


SPS: 80 Average Reward: -13.060363045647362


 27% (132 of 488) |#####                 | Elapsed Time: 0:56:00 ETA:   2:25:20


SPS: 80 Average Reward: -11.545055176758504


 27% (133 of 488) |#####                 | Elapsed Time: 0:56:24 ETA:   2:25:17


SPS: 80 Average Reward: -11.679049286565094


 27% (134 of 488) |######                | Elapsed Time: 0:56:49 ETA:   2:25:20


SPS: 80 Average Reward: -13.560130396312482


 27% (135 of 488) |######                | Elapsed Time: 0:57:14 ETA:   2:24:17


SPS: 80 Average Reward: -13.553482336107809


 27% (136 of 488) |######                | Elapsed Time: 0:57:38 ETA:   2:23:26


SPS: 80 Average Reward: -11.971224320644101


 28% (137 of 488) |######                | Elapsed Time: 0:58:03 ETA:   2:23:36


SPS: 80 Average Reward: -10.730175822323865


 28% (138 of 488) |######                | Elapsed Time: 0:58:27 ETA:   2:22:10


SPS: 80 Average Reward: -11.711911181882646


 28% (139 of 488) |######                | Elapsed Time: 0:58:51 ETA:   2:22:26


SPS: 80 Average Reward: -11.286394310113616


 28% (140 of 488) |######                | Elapsed Time: 0:59:16 ETA:   2:21:47


SPS: 80 Average Reward: -10.182255766470215


 28% (141 of 488) |######                | Elapsed Time: 0:59:40 ETA:   2:21:22


SPS: 80 Average Reward: -14.021840017882742


 29% (142 of 488) |######                | Elapsed Time: 1:00:05 ETA:   2:21:35


SPS: 80 Average Reward: -11.668626382989386


 29% (143 of 488) |######                | Elapsed Time: 1:00:29 ETA:   2:20:16


SPS: 80 Average Reward: -11.140149264527825


 29% (144 of 488) |######                | Elapsed Time: 1:00:54 ETA:   2:20:10


SPS: 80 Average Reward: -11.764561337235122


 29% (145 of 488) |######                | Elapsed Time: 1:01:18 ETA:   2:19:52


SPS: 80 Average Reward: -12.040425098719354


 29% (146 of 488) |######                | Elapsed Time: 1:01:43 ETA:   2:19:23


SPS: 80 Average Reward: -10.414680679498142


 30% (147 of 488) |######                | Elapsed Time: 1:02:07 ETA:   2:19:30


SPS: 80 Average Reward: -12.82476609629429


 30% (148 of 488) |######                | Elapsed Time: 1:02:32 ETA:   2:19:23


SPS: 80 Average Reward: -10.53343596761457


 30% (149 of 488) |######                | Elapsed Time: 1:02:56 ETA:   2:18:06


SPS: 80 Average Reward: -11.001751404799766


 30% (150 of 488) |######                | Elapsed Time: 1:03:21 ETA:   2:17:18


SPS: 80 Average Reward: -14.143316823170153


 30% (151 of 488) |######                | Elapsed Time: 1:03:45 ETA:   2:17:52


SPS: 80 Average Reward: -11.14090830412924


 31% (152 of 488) |######                | Elapsed Time: 1:04:10 ETA:   2:16:46


SPS: 80 Average Reward: -12.044460811592899


 31% (153 of 488) |######                | Elapsed Time: 1:04:34 ETA:   2:16:37


SPS: 80 Average Reward: -11.23955074993042


 31% (154 of 488) |######                | Elapsed Time: 1:04:59 ETA:   2:16:14


SPS: 80 Average Reward: -11.742867331843183


 31% (155 of 488) |######                | Elapsed Time: 1:05:23 ETA:   2:15:56


SPS: 80 Average Reward: -12.149728723970519


 31% (156 of 488) |#######               | Elapsed Time: 1:05:48 ETA:   2:15:24


SPS: 80 Average Reward: -12.17748737696777


 32% (157 of 488) |#######               | Elapsed Time: 1:06:12 ETA:   2:14:55


SPS: 80 Average Reward: -11.291318442716294


 32% (158 of 488) |#######               | Elapsed Time: 1:06:36 ETA:   2:13:14


SPS: 80 Average Reward: -11.681043114860088


 32% (159 of 488) |#######               | Elapsed Time: 1:07:00 ETA:   2:10:59


SPS: 81 Average Reward: -12.485161885551582


 32% (160 of 488) |#######               | Elapsed Time: 1:07:24 ETA:   2:12:17


SPS: 81 Average Reward: -10.778550754121124


 32% (161 of 488) |#######               | Elapsed Time: 1:07:49 ETA:   2:12:41


SPS: 81 Average Reward: -13.28788652306643


 33% (162 of 488) |#######               | Elapsed Time: 1:08:13 ETA:   2:12:01


SPS: 81 Average Reward: -12.203469722499248


 33% (163 of 488) |#######               | Elapsed Time: 1:08:37 ETA:   2:11:48


SPS: 81 Average Reward: -10.32501861370644


 33% (164 of 488) |#######               | Elapsed Time: 1:09:02 ETA:   2:10:51


SPS: 81 Average Reward: -10.298205118725498


 33% (165 of 488) |#######               | Elapsed Time: 1:09:26 ETA:   2:11:23


SPS: 81 Average Reward: -10.63106882996433


 34% (166 of 488) |#######               | Elapsed Time: 1:09:50 ETA:   2:09:05


SPS: 81 Average Reward: -10.277707695162135


 34% (167 of 488) |#######               | Elapsed Time: 1:10:14 ETA:   2:08:15


SPS: 81 Average Reward: -10.071223619125762


 34% (168 of 488) |#######               | Elapsed Time: 1:10:38 ETA:   2:09:48


SPS: 81 Average Reward: -8.736326118936997


 34% (169 of 488) |#######               | Elapsed Time: 1:11:03 ETA:   2:08:51


SPS: 81 Average Reward: -12.614301409638545


 34% (170 of 488) |#######               | Elapsed Time: 1:11:27 ETA:   2:08:58


SPS: 81 Average Reward: -11.874014135200976


 35% (171 of 488) |#######               | Elapsed Time: 1:11:51 ETA:   2:08:32


SPS: 81 Average Reward: -9.62166523773738


 35% (172 of 488) |#######               | Elapsed Time: 1:12:15 ETA:   2:07:27


SPS: 81 Average Reward: -13.07476538761986


 35% (173 of 488) |#######               | Elapsed Time: 1:12:40 ETA:   2:07:24


SPS: 81 Average Reward: -11.290757053053184


 35% (174 of 488) |#######               | Elapsed Time: 1:13:04 ETA:   2:06:43


SPS: 81 Average Reward: -9.96610599204149


 35% (175 of 488) |#######               | Elapsed Time: 1:13:28 ETA:   2:05:45


SPS: 81 Average Reward: -10.8701041481098


 36% (176 of 488) |#######               | Elapsed Time: 1:13:52 ETA:   2:06:07


SPS: 81 Average Reward: -10.609369094425432


 36% (177 of 488) |#######               | Elapsed Time: 1:14:16 ETA:   2:05:46


SPS: 81 Average Reward: -9.419802313822132


 36% (178 of 488) |########              | Elapsed Time: 1:14:41 ETA:   2:04:24


SPS: 81 Average Reward: -9.973305791482678


 36% (179 of 488) |########              | Elapsed Time: 1:15:05 ETA:   2:04:31


SPS: 81 Average Reward: -10.72075348556624


 36% (180 of 488) |########              | Elapsed Time: 1:15:29 ETA:   2:04:01


SPS: 81 Average Reward: -11.650185564401951


 37% (181 of 488) |########              | Elapsed Time: 1:15:53 ETA:   2:03:34


SPS: 81 Average Reward: -11.867389796101115


 37% (182 of 488) |########              | Elapsed Time: 1:16:17 ETA:   2:03:28


SPS: 81 Average Reward: -13.972413224699803


 37% (183 of 488) |########              | Elapsed Time: 1:16:42 ETA:   2:03:09


SPS: 81 Average Reward: -10.890803913443461


 37% (184 of 488) |########              | Elapsed Time: 1:17:06 ETA:   2:02:38


SPS: 81 Average Reward: -11.585788561503911


 37% (185 of 488) |########              | Elapsed Time: 1:17:30 ETA:   2:01:42


SPS: 81 Average Reward: -9.762618971411282


 38% (186 of 488) |########              | Elapsed Time: 1:17:54 ETA:   2:01:15


SPS: 81 Average Reward: -9.963144634197702


 38% (187 of 488) |########              | Elapsed Time: 1:18:18 ETA:   2:01:30


SPS: 81 Average Reward: -9.288329112183678


 38% (188 of 488) |########              | Elapsed Time: 1:18:42 ETA:   2:00:54


SPS: 81 Average Reward: -9.098629075404183


 38% (189 of 488) |########              | Elapsed Time: 1:19:06 ETA:   2:00:17


SPS: 81 Average Reward: -10.056864248684782


 38% (190 of 488) |########              | Elapsed Time: 1:19:30 ETA:   1:58:41


SPS: 81 Average Reward: -9.562667321192562


 39% (191 of 488) |########              | Elapsed Time: 1:19:54 ETA:   1:57:44


SPS: 81 Average Reward: -9.751950338817517


 39% (192 of 488) |########              | Elapsed Time: 1:20:18 ETA:   1:57:53


SPS: 81 Average Reward: -10.323539422087423


 39% (193 of 488) |########              | Elapsed Time: 1:20:42 ETA:   1:58:46


SPS: 81 Average Reward: -9.738266084558777


 39% (194 of 488) |########              | Elapsed Time: 1:21:06 ETA:   1:58:24


SPS: 81 Average Reward: -9.752195450503027


 39% (195 of 488) |########              | Elapsed Time: 1:21:31 ETA:   1:59:04


SPS: 81 Average Reward: -13.775038632857731


 40% (196 of 488) |########              | Elapsed Time: 1:21:55 ETA:   1:57:15


SPS: 81 Average Reward: -10.534996015364985


 40% (197 of 488) |########              | Elapsed Time: 1:22:19 ETA:   1:58:20


SPS: 81 Average Reward: -6.033510279846951


 40% (198 of 488) |########              | Elapsed Time: 1:22:46 ETA:   2:11:34


SPS: 81 Average Reward: -8.489567907658122


 40% (199 of 488) |########              | Elapsed Time: 1:23:12 ETA:   2:03:09


SPS: 81 Average Reward: -9.012704255747913


 40% (200 of 488) |#########             | Elapsed Time: 1:23:37 ETA:   2:00:13


SPS: 81 Average Reward: -11.453843216794494


 41% (201 of 488) |#########             | Elapsed Time: 1:24:02 ETA:   2:00:10


SPS: 81 Average Reward: -9.419499265554467


 41% (202 of 488) |#########             | Elapsed Time: 1:24:27 ETA:   1:59:33


SPS: 81 Average Reward: -10.03494378520773


 41% (203 of 488) |#########             | Elapsed Time: 1:24:52 ETA:   1:58:46


SPS: 81 Average Reward: -9.057791612767003


 41% (204 of 488) |#########             | Elapsed Time: 1:25:17 ETA:   1:59:08


SPS: 81 Average Reward: -8.911093314768436


 42% (205 of 488) |#########             | Elapsed Time: 1:25:42 ETA:   1:57:58


SPS: 81 Average Reward: -11.503381203067905


 42% (206 of 488) |#########             | Elapsed Time: 1:26:08 ETA:   1:57:49


SPS: 81 Average Reward: -10.166716648102922


 42% (207 of 488) |#########             | Elapsed Time: 1:26:33 ETA:   1:57:50


SPS: 81 Average Reward: -10.47208602639438


 42% (208 of 488) |#########             | Elapsed Time: 1:26:58 ETA:   1:57:10


SPS: 81 Average Reward: -8.94687427010946


 42% (209 of 488) |#########             | Elapsed Time: 1:27:23 ETA:   1:56:37


SPS: 81 Average Reward: -8.134755207520241


 43% (210 of 488) |#########             | Elapsed Time: 1:27:48 ETA:   1:55:35


SPS: 81 Average Reward: -8.653692382343694


 43% (211 of 488) |#########             | Elapsed Time: 1:28:13 ETA:   1:55:14


SPS: 81 Average Reward: -9.125320196282985


 43% (212 of 488) |#########             | Elapsed Time: 1:28:38 ETA:   1:55:23


SPS: 81 Average Reward: -12.300560876842317


 43% (213 of 488) |#########             | Elapsed Time: 1:29:03 ETA:   1:55:35


SPS: 81 Average Reward: -8.035500310801293


 43% (214 of 488) |#########             | Elapsed Time: 1:29:28 ETA:   1:53:47


SPS: 81 Average Reward: -11.638617203012847


 44% (215 of 488) |#########             | Elapsed Time: 1:29:53 ETA:   1:54:07


SPS: 81 Average Reward: -9.790545315858031


 44% (216 of 488) |#########             | Elapsed Time: 1:30:18 ETA:   1:53:42


SPS: 81 Average Reward: -9.833016736159315


 44% (217 of 488) |#########             | Elapsed Time: 1:30:43 ETA:   1:52:48


SPS: 81 Average Reward: -9.989488186756262


 44% (218 of 488) |#########             | Elapsed Time: 1:31:08 ETA:   1:52:48


SPS: 81 Average Reward: -9.032112364542554


 44% (219 of 488) |#########             | Elapsed Time: 1:31:33 ETA:   1:52:32


SPS: 81 Average Reward: -9.702846264305307


 45% (220 of 488) |#########             | Elapsed Time: 1:31:59 ETA:   1:52:28


SPS: 81 Average Reward: -9.134241021590432


 45% (221 of 488) |#########             | Elapsed Time: 1:32:24 ETA:   1:51:43


SPS: 81 Average Reward: -10.013735327389853


 45% (222 of 488) |##########            | Elapsed Time: 1:32:49 ETA:   1:50:52


SPS: 81 Average Reward: -9.6247566289983


 45% (223 of 488) |##########            | Elapsed Time: 1:33:14 ETA:   1:50:12


SPS: 81 Average Reward: -8.452665869473027


 45% (224 of 488) |##########            | Elapsed Time: 1:33:39 ETA:   1:50:26


SPS: 81 Average Reward: -10.619268702969244


 46% (225 of 488) |##########            | Elapsed Time: 1:34:04 ETA:   1:49:09


SPS: 81 Average Reward: -11.449799845141474


 46% (226 of 488) |##########            | Elapsed Time: 1:34:29 ETA:   1:49:12


SPS: 81 Average Reward: -10.662326953464225


 46% (227 of 488) |##########            | Elapsed Time: 1:34:54 ETA:   1:48:51


SPS: 81 Average Reward: -7.241396975808244


 46% (228 of 488) |##########            | Elapsed Time: 1:35:19 ETA:   1:47:50


SPS: 81 Average Reward: -8.296005568653303


 46% (229 of 488) |##########            | Elapsed Time: 1:35:43 ETA:   1:47:24


SPS: 81 Average Reward: -9.22419839555304


 47% (230 of 488) |##########            | Elapsed Time: 1:36:08 ETA:   1:45:59


SPS: 81 Average Reward: -6.501785072780444


 47% (231 of 488) |##########            | Elapsed Time: 1:36:33 ETA:   1:47:04


SPS: 81 Average Reward: -10.939766079704864


 47% (232 of 488) |##########            | Elapsed Time: 1:36:58 ETA:   1:45:51


SPS: 81 Average Reward: -9.402205727914373


 47% (233 of 488) |##########            | Elapsed Time: 1:37:23 ETA:   1:46:44


SPS: 81 Average Reward: -8.309690938721175


 47% (234 of 488) |##########            | Elapsed Time: 1:37:48 ETA:   1:45:54


SPS: 81 Average Reward: -8.533090141870098


 48% (235 of 488) |##########            | Elapsed Time: 1:38:13 ETA:   1:45:17


SPS: 81 Average Reward: -6.835967261673679


 48% (236 of 488) |##########            | Elapsed Time: 1:38:38 ETA:   1:44:26


SPS: 81 Average Reward: -8.671394868924416


 48% (237 of 488) |##########            | Elapsed Time: 1:39:03 ETA:   1:43:54


SPS: 81 Average Reward: -8.52365426998128


 48% (238 of 488) |##########            | Elapsed Time: 1:39:28 ETA:   1:44:14


SPS: 81 Average Reward: -8.048263991376064


 48% (239 of 488) |##########            | Elapsed Time: 1:39:53 ETA:   1:44:25


SPS: 81 Average Reward: -9.270123274340882


 49% (240 of 488) |##########            | Elapsed Time: 1:40:18 ETA:   1:43:20


SPS: 81 Average Reward: -8.713325722109081


 49% (241 of 488) |##########            | Elapsed Time: 1:40:43 ETA:   1:42:19


SPS: 81 Average Reward: -8.179518549448355


 49% (242 of 488) |##########            | Elapsed Time: 1:41:08 ETA:   1:42:37


SPS: 81 Average Reward: -12.022622758213636


 49% (243 of 488) |##########            | Elapsed Time: 1:41:33 ETA:   1:42:19


SPS: 81 Average Reward: -8.546761801750689


 50% (244 of 488) |###########           | Elapsed Time: 1:41:58 ETA:   1:41:42


SPS: 81 Average Reward: -8.36661620057175


 50% (245 of 488) |###########           | Elapsed Time: 1:42:23 ETA:   1:40:42


SPS: 81 Average Reward: -7.82020362271282


 50% (246 of 488) |###########           | Elapsed Time: 1:42:48 ETA:   1:41:07


SPS: 81 Average Reward: -8.68031422870453


 50% (247 of 488) |###########           | Elapsed Time: 1:43:13 ETA:   1:39:57


SPS: 81 Average Reward: -8.637096091045072


 50% (248 of 488) |###########           | Elapsed Time: 1:43:37 ETA:   1:38:02


SPS: 81 Average Reward: -7.990747574071107


 51% (249 of 488) |###########           | Elapsed Time: 1:44:02 ETA:   1:37:30


SPS: 81 Average Reward: -8.259178479073888


 51% (250 of 488) |###########           | Elapsed Time: 1:44:26 ETA:   1:37:38


SPS: 81 Average Reward: -8.057382262064891


 51% (251 of 488) |###########           | Elapsed Time: 1:44:51 ETA:   1:37:02


SPS: 81 Average Reward: -7.382086180443579


 51% (252 of 488) |###########           | Elapsed Time: 1:45:15 ETA:   1:36:22


SPS: 81 Average Reward: -8.556954202394612


 51% (253 of 488) |###########           | Elapsed Time: 1:45:40 ETA:   1:35:33


SPS: 81 Average Reward: -8.61150660782471


 52% (254 of 488) |###########           | Elapsed Time: 1:46:04 ETA:   1:35:18


SPS: 81 Average Reward: -7.507759319521351


 52% (255 of 488) |###########           | Elapsed Time: 1:46:29 ETA:   1:35:09


SPS: 81 Average Reward: -8.011048569126194


 52% (256 of 488) |###########           | Elapsed Time: 1:46:53 ETA:   1:34:37


SPS: 81 Average Reward: -8.021385392930354


 52% (257 of 488) |###########           | Elapsed Time: 1:47:18 ETA:   1:35:03


SPS: 81 Average Reward: -9.272707036529496


 52% (258 of 488) |###########           | Elapsed Time: 1:47:42 ETA:   1:33:44


SPS: 81 Average Reward: -8.653583178089209


 53% (259 of 488) |###########           | Elapsed Time: 1:48:07 ETA:   1:33:56


SPS: 81 Average Reward: -9.653566796842277


 53% (260 of 488) |###########           | Elapsed Time: 1:48:33 ETA:   1:40:12


SPS: 81 Average Reward: -8.430332025715567


 53% (261 of 488) |###########           | Elapsed Time: 1:49:00 ETA:   1:39:43


SPS: 81 Average Reward: -8.78349057098966


 53% (262 of 488) |###########           | Elapsed Time: 1:49:25 ETA:   1:35:40


SPS: 81 Average Reward: -9.01493050549171


 53% (263 of 488) |###########           | Elapsed Time: 1:49:50 ETA:   1:33:47


SPS: 81 Average Reward: -7.613648852654522


 54% (264 of 488) |###########           | Elapsed Time: 1:50:15 ETA:   1:33:21


SPS: 81 Average Reward: -7.8162926686791785


 54% (265 of 488) |###########           | Elapsed Time: 1:50:40 ETA:   1:32:48


SPS: 81 Average Reward: -9.288035937303729


 54% (266 of 488) |###########           | Elapsed Time: 1:51:05 ETA:   1:32:44


SPS: 81 Average Reward: -6.7572939053007515


 54% (267 of 488) |############          | Elapsed Time: 1:51:30 ETA:   1:32:17


SPS: 81 Average Reward: -9.097997087768174


 54% (268 of 488) |############          | Elapsed Time: 1:51:55 ETA:   1:32:08


SPS: 81 Average Reward: -10.676870022648979


 55% (269 of 488) |############          | Elapsed Time: 1:52:20 ETA:   1:30:54


SPS: 81 Average Reward: -7.863493057221771


 55% (270 of 488) |############          | Elapsed Time: 1:52:45 ETA:   1:30:42


SPS: 81 Average Reward: -7.392702222662644


 55% (271 of 488) |############          | Elapsed Time: 1:53:11 ETA:   1:32:22


SPS: 81 Average Reward: -8.20863367251983


 55% (272 of 488) |############          | Elapsed Time: 1:53:36 ETA:   1:31:23


SPS: 81 Average Reward: -8.016690541791062


 55% (273 of 488) |############          | Elapsed Time: 1:54:03 ETA:   1:37:39


SPS: 81 Average Reward: -7.895475221123061


 56% (274 of 488) |############          | Elapsed Time: 1:54:30 ETA:   1:35:33


SPS: 81 Average Reward: -5.28727328687448


 56% (275 of 488) |############          | Elapsed Time: 1:54:57 ETA:   1:35:19


SPS: 81 Average Reward: -7.775653366469252


 56% (276 of 488) |############          | Elapsed Time: 1:55:27 ETA:   1:47:21


SPS: 81 Average Reward: -7.198665846527751


 56% (277 of 488) |############          | Elapsed Time: 1:55:58 ETA:   1:49:15


SPS: 81 Average Reward: -8.757828609937485


 56% (278 of 488) |############          | Elapsed Time: 1:56:30 ETA:   1:50:10


SPS: 81 Average Reward: -8.60339383898605


 57% (279 of 488) |############          | Elapsed Time: 1:57:00 ETA:   1:45:40


SPS: 81 Average Reward: -7.063925429929915


 57% (280 of 488) |############          | Elapsed Time: 1:57:32 ETA:   1:48:34


SPS: 81 Average Reward: -7.306997580565658


 57% (281 of 488) |############          | Elapsed Time: 1:58:03 ETA:   1:49:08


SPS: 81 Average Reward: -6.675431207067841


 57% (282 of 488) |############          | Elapsed Time: 1:58:34 ETA:   1:47:22


SPS: 81 Average Reward: -7.708521313646746


 57% (283 of 488) |############          | Elapsed Time: 1:59:04 ETA:   1:41:10


SPS: 81 Average Reward: -8.380777145392786


 58% (284 of 488) |############          | Elapsed Time: 1:59:36 ETA:   1:50:05


SPS: 80 Average Reward: -7.7142018921933015


 58% (285 of 488) |############          | Elapsed Time: 2:00:08 ETA:   1:46:26


SPS: 80 Average Reward: -7.751280900322351


 58% (286 of 488) |############          | Elapsed Time: 2:00:39 ETA:   1:44:58


SPS: 80 Average Reward: -6.7741915141415205


 58% (287 of 488) |############          | Elapsed Time: 2:01:05 ETA:   1:28:29


SPS: 80 Average Reward: -7.256862548614247


 59% (288 of 488) |############          | Elapsed Time: 2:01:36 ETA:   1:41:37


SPS: 80 Average Reward: -7.576839787450205


 59% (289 of 488) |#############         | Elapsed Time: 2:02:06 ETA:   1:39:15


SPS: 80 Average Reward: -7.25570108690498


 59% (290 of 488) |#############         | Elapsed Time: 2:02:36 ETA:   1:40:15


SPS: 80 Average Reward: -7.553187829870736


 59% (291 of 488) |#############         | Elapsed Time: 2:03:07 ETA:   1:41:57


SPS: 80 Average Reward: -7.239657309105354


 59% (292 of 488) |#############         | Elapsed Time: 2:03:35 ETA:   1:29:36


SPS: 80 Average Reward: -7.368170568555964


 60% (293 of 488) |#############         | Elapsed Time: 2:04:02 ETA:   1:28:19


SPS: 80 Average Reward: -6.784728551223794


 60% (294 of 488) |#############         | Elapsed Time: 2:04:29 ETA:   1:28:21


SPS: 80 Average Reward: -7.3273815062748975


 60% (295 of 488) |#############         | Elapsed Time: 2:04:56 ETA:   1:25:54


SPS: 80 Average Reward: -6.971732332070348


 60% (296 of 488) |#############         | Elapsed Time: 2:05:22 ETA:   1:22:20


SPS: 80 Average Reward: -7.604883751079195


 60% (297 of 488) |#############         | Elapsed Time: 2:05:47 ETA:   1:20:38


SPS: 80 Average Reward: -6.663658422864284


 61% (298 of 488) |#############         | Elapsed Time: 2:06:13 ETA:   1:23:13


SPS: 80 Average Reward: -7.012658515189138


 61% (299 of 488) |#############         | Elapsed Time: 2:06:40 ETA:   1:25:04


SPS: 80 Average Reward: -7.1847979425742325


 61% (300 of 488) |#############         | Elapsed Time: 2:07:12 ETA:   1:37:56


SPS: 80 Average Reward: -7.052911074811015


 61% (301 of 488) |#############         | Elapsed Time: 2:07:39 ETA:   1:26:57


SPS: 80 Average Reward: -7.376019006318532


 61% (302 of 488) |#############         | Elapsed Time: 2:08:09 ETA:   1:31:45


SPS: 80 Average Reward: -9.861319607200054


 62% (303 of 488) |#############         | Elapsed Time: 2:08:37 ETA:   1:27:08


SPS: 80 Average Reward: -7.584579122191912


 62% (304 of 488) |#############         | Elapsed Time: 2:09:05 ETA:   1:24:28


SPS: 80 Average Reward: -7.492098796092352


 62% (305 of 488) |#############         | Elapsed Time: 2:09:31 ETA:   1:18:28


SPS: 80 Average Reward: -7.509821275881747


 62% (306 of 488) |#############         | Elapsed Time: 2:09:57 ETA:   1:20:45


SPS: 80 Average Reward: -6.677667996021956


 62% (307 of 488) |#############         | Elapsed Time: 2:10:24 ETA:   1:19:28


SPS: 80 Average Reward: -7.611712288113586


 63% (308 of 488) |#############         | Elapsed Time: 2:10:53 ETA:   1:27:06


SPS: 80 Average Reward: -6.925904210553053


 63% (309 of 488) |#############         | Elapsed Time: 2:11:22 ETA:   1:28:03


SPS: 80 Average Reward: -7.064628300423746


 63% (310 of 488) |#############         | Elapsed Time: 2:11:51 ETA:   1:24:44


SPS: 80 Average Reward: -7.168985575866686


 63% (311 of 488) |##############        | Elapsed Time: 2:12:20 ETA:   1:25:45


SPS: 80 Average Reward: -6.536346101855568


 63% (312 of 488) |##############        | Elapsed Time: 2:12:48 ETA:   1:22:17


SPS: 80 Average Reward: -6.931799010673007


 64% (313 of 488) |##############        | Elapsed Time: 2:13:18 ETA:   1:26:55


SPS: 80 Average Reward: -8.02822015546001


 64% (314 of 488) |##############        | Elapsed Time: 2:13:47 ETA:   1:25:48


SPS: 80 Average Reward: -7.31806070634171


 64% (315 of 488) |##############        | Elapsed Time: 2:14:15 ETA:   1:20:48


SPS: 80 Average Reward: -6.673606839964101


 64% (316 of 488) |##############        | Elapsed Time: 2:14:41 ETA:   1:13:11


SPS: 80 Average Reward: -7.8906630676282905


 64% (317 of 488) |##############        | Elapsed Time: 2:15:06 ETA:   1:12:55


SPS: 80 Average Reward: -7.221977540407271


 65% (318 of 488) |##############        | Elapsed Time: 2:15:32 ETA:   1:12:43


SPS: 80 Average Reward: -7.194369028602907


 65% (319 of 488) |##############        | Elapsed Time: 2:15:59 ETA:   1:16:43


SPS: 80 Average Reward: -7.031292247308819


 65% (320 of 488) |##############        | Elapsed Time: 2:16:30 ETA:   1:26:26


SPS: 80 Average Reward: -6.686451821283068


 65% (321 of 488) |##############        | Elapsed Time: 2:16:56 ETA:   1:12:12


SPS: 80 Average Reward: -7.887788731676035


 65% (322 of 488) |##############        | Elapsed Time: 2:17:21 ETA:   1:10:12


SPS: 80 Average Reward: -6.22304534865312


 66% (323 of 488) |##############        | Elapsed Time: 2:17:47 ETA:   1:10:22


SPS: 80 Average Reward: -8.20251885437458


 66% (324 of 488) |##############        | Elapsed Time: 2:18:12 ETA:   1:09:19


SPS: 80 Average Reward: -6.63034517227835


 66% (325 of 488) |##############        | Elapsed Time: 2:18:38 ETA:   1:09:19


SPS: 79 Average Reward: -6.541760206881805


 66% (326 of 488) |##############        | Elapsed Time: 2:19:08 ETA:   1:22:23


SPS: 79 Average Reward: -6.616845038432025


 67% (327 of 488) |##############        | Elapsed Time: 2:19:36 ETA:   1:13:38


SPS: 79 Average Reward: -7.259087125222428


 67% (328 of 488) |##############        | Elapsed Time: 2:20:05 ETA:   1:16:55


SPS: 79 Average Reward: -7.032676732400766


 67% (329 of 488) |##############        | Elapsed Time: 2:20:33 ETA:   1:13:37


SPS: 79 Average Reward: -6.7648893096041585


 67% (330 of 488) |##############        | Elapsed Time: 2:21:00 ETA:   1:13:18


SPS: 79 Average Reward: -7.023548649401114


 67% (331 of 488) |##############        | Elapsed Time: 2:21:28 ETA:   1:13:13


SPS: 79 Average Reward: -6.781181128223895


 68% (332 of 488) |##############        | Elapsed Time: 2:21:56 ETA:   1:10:58


SPS: 79 Average Reward: -6.691319662846832


 68% (333 of 488) |###############       | Elapsed Time: 2:22:22 ETA:   1:08:18


SPS: 79 Average Reward: -6.205960871882736


 68% (334 of 488) |###############       | Elapsed Time: 2:22:48 ETA:   1:07:20


SPS: 79 Average Reward: -6.346989449203773


 68% (335 of 488) |###############       | Elapsed Time: 2:23:15 ETA:   1:08:28


SPS: 79 Average Reward: -7.063210828649829


 68% (336 of 488) |###############       | Elapsed Time: 2:23:42 ETA:   1:08:46


SPS: 79 Average Reward: -7.058478827060715


 69% (337 of 488) |###############       | Elapsed Time: 2:24:09 ETA:   1:07:57


SPS: 79 Average Reward: -7.206381261531837


 69% (338 of 488) |###############       | Elapsed Time: 2:24:37 ETA:   1:08:42


SPS: 79 Average Reward: -7.037468522877365


 69% (339 of 488) |###############       | Elapsed Time: 2:25:06 ETA:   1:12:04


SPS: 79 Average Reward: -6.251368452696069


 69% (340 of 488) |###############       | Elapsed Time: 2:25:34 ETA:   1:10:17


SPS: 79 Average Reward: -7.975925942158981


 69% (341 of 488) |###############       | Elapsed Time: 2:26:05 ETA:   1:14:08


SPS: 79 Average Reward: -5.986974943013772


 70% (342 of 488) |###############       | Elapsed Time: 2:26:35 ETA:   1:13:03


SPS: 79 Average Reward: -7.699363130480667


 70% (343 of 488) |###############       | Elapsed Time: 2:27:04 ETA:   1:11:31


SPS: 79 Average Reward: -6.595009702055849


 70% (344 of 488) |###############       | Elapsed Time: 2:27:31 ETA:   1:04:02


SPS: 79 Average Reward: -7.01385323792928


 70% (345 of 488) |###############       | Elapsed Time: 2:28:00 ETA:   1:08:55


SPS: 79 Average Reward: -6.452022870670923


 70% (346 of 488) |###############       | Elapsed Time: 2:28:32 ETA:   1:15:05


SPS: 79 Average Reward: -6.576697920260868


 71% (347 of 488) |###############       | Elapsed Time: 2:28:59 ETA:   1:04:42


SPS: 79 Average Reward: -6.674441491948219


 71% (348 of 488) |###############       | Elapsed Time: 2:29:27 ETA:   1:04:09


SPS: 79 Average Reward: -6.325074948955675


 71% (349 of 488) |###############       | Elapsed Time: 2:29:54 ETA:   1:02:44


SPS: 79 Average Reward: -6.553872362537692


 71% (350 of 488) |###############       | Elapsed Time: 2:30:21 ETA:   1:03:30


SPS: 79 Average Reward: -7.108850398612037


 71% (351 of 488) |###############       | Elapsed Time: 2:30:47 ETA:   0:58:32


SPS: 79 Average Reward: -6.764710785078666


 72% (352 of 488) |###############       | Elapsed Time: 2:31:13 ETA:   0:58:15


SPS: 79 Average Reward: -6.5965542081404775


 72% (353 of 488) |###############       | Elapsed Time: 2:31:39 ETA:   0:59:24


SPS: 79 Average Reward: -6.688167956883168


 72% (354 of 488) |###############       | Elapsed Time: 2:32:06 ETA:   0:59:31


SPS: 79 Average Reward: -6.246359417198467


 72% (355 of 488) |################      | Elapsed Time: 2:32:32 ETA:   0:59:23


SPS: 79 Average Reward: -4.607700427983458


 72% (356 of 488) |################      | Elapsed Time: 2:32:59 ETA:   0:57:55


SPS: 79 Average Reward: -7.308346035139783


 73% (357 of 488) |################      | Elapsed Time: 2:33:25 ETA:   0:57:31


SPS: 79 Average Reward: -5.808391263577196


 73% (358 of 488) |################      | Elapsed Time: 2:33:51 ETA:   0:56:42


SPS: 79 Average Reward: -8.793226213297807


 73% (359 of 488) |################      | Elapsed Time: 2:34:18 ETA:   0:57:08


SPS: 79 Average Reward: -6.009487211125516


 73% (360 of 488) |################      | Elapsed Time: 2:34:44 ETA:   0:56:02


SPS: 79 Average Reward: -6.223435264829043


 73% (361 of 488) |################      | Elapsed Time: 2:35:10 ETA:   0:55:27


SPS: 79 Average Reward: -6.302289385798267


 74% (362 of 488) |################      | Elapsed Time: 2:35:36 ETA:   0:54:52


SPS: 79 Average Reward: -7.96943542950773


 74% (363 of 488) |################      | Elapsed Time: 2:36:03 ETA:   0:54:30


SPS: 79 Average Reward: -6.897313836655182


 74% (364 of 488) |################      | Elapsed Time: 2:36:29 ETA:   0:53:51


SPS: 79 Average Reward: -6.349000220098301


 74% (365 of 488) |################      | Elapsed Time: 2:36:55 ETA:   0:53:23


SPS: 79 Average Reward: -6.498005459374307


 75% (366 of 488) |################      | Elapsed Time: 2:37:21 ETA:   0:52:51


SPS: 79 Average Reward: -6.358798482945225


 75% (367 of 488) |################      | Elapsed Time: 2:37:48 ETA:   0:54:50


SPS: 79 Average Reward: -6.373837557651872


 75% (368 of 488) |################      | Elapsed Time: 2:38:14 ETA:   0:53:04


SPS: 79 Average Reward: -4.483241134267354


 75% (369 of 488) |################      | Elapsed Time: 2:38:41 ETA:   0:52:21


SPS: 79 Average Reward: -5.670510356401244


 75% (370 of 488) |################      | Elapsed Time: 2:39:07 ETA:   0:51:22


SPS: 79 Average Reward: -6.125032675417029


 76% (371 of 488) |################      | Elapsed Time: 2:39:33 ETA:   0:51:33


SPS: 79 Average Reward: -6.289266383929137


 76% (372 of 488) |################      | Elapsed Time: 2:40:00 ETA:   0:50:38


SPS: 79 Average Reward: -6.401228821124611


 76% (373 of 488) |################      | Elapsed Time: 2:40:26 ETA:   0:50:31


SPS: 79 Average Reward: -6.23158883283551


 76% (374 of 488) |################      | Elapsed Time: 2:40:52 ETA:   0:49:29


SPS: 79 Average Reward: -6.939438779427646


 76% (375 of 488) |################      | Elapsed Time: 2:41:18 ETA:   0:48:30


SPS: 79 Average Reward: -6.601326318558466


 77% (376 of 488) |################      | Elapsed Time: 2:41:44 ETA:   0:48:55


SPS: 79 Average Reward: -6.162510920844313


 77% (377 of 488) |################      | Elapsed Time: 2:42:12 ETA:   0:51:06


SPS: 79 Average Reward: -6.976172859789063


 77% (378 of 488) |#################     | Elapsed Time: 2:42:38 ETA:   0:47:51


SPS: 79 Average Reward: -5.563055385709624


 77% (379 of 488) |#################     | Elapsed Time: 2:43:04 ETA:   0:48:06


SPS: 79 Average Reward: -6.579078444179672


 77% (380 of 488) |#################     | Elapsed Time: 2:43:30 ETA:   0:47:13


SPS: 79 Average Reward: -6.064053210008717


 78% (381 of 488) |#################     | Elapsed Time: 2:43:57 ETA:   0:46:36


SPS: 79 Average Reward: -6.914175643719061


 78% (382 of 488) |#################     | Elapsed Time: 2:44:23 ETA:   0:45:50


SPS: 79 Average Reward: -6.489702279075612


 78% (383 of 488) |#################     | Elapsed Time: 2:44:49 ETA:   0:46:26


SPS: 79 Average Reward: -5.744281880557263


 78% (384 of 488) |#################     | Elapsed Time: 2:45:16 ETA:   0:46:00


SPS: 79 Average Reward: -5.816433412668751


 78% (385 of 488) |#################     | Elapsed Time: 2:45:41 ETA:   0:43:54


SPS: 79 Average Reward: -6.148166612682571


 79% (386 of 488) |#################     | Elapsed Time: 2:46:07 ETA:   0:43:46


SPS: 79 Average Reward: -8.070940305543077


 79% (387 of 488) |#################     | Elapsed Time: 2:46:34 ETA:   0:46:01


SPS: 79 Average Reward: -6.41336985852309


 79% (388 of 488) |#################     | Elapsed Time: 2:47:02 ETA:   0:46:05


SPS: 79 Average Reward: -6.125020191431578


 79% (389 of 488) |#################     | Elapsed Time: 2:47:28 ETA:   0:43:23


SPS: 79 Average Reward: -6.347321245698887


 79% (390 of 488) |#################     | Elapsed Time: 2:47:55 ETA:   0:43:29


SPS: 79 Average Reward: -5.683878218589449


 80% (391 of 488) |#################     | Elapsed Time: 2:48:22 ETA:   0:43:23


SPS: 79 Average Reward: -6.235150775411751


 80% (392 of 488) |#################     | Elapsed Time: 2:48:48 ETA:   0:42:47


SPS: 79 Average Reward: -5.917022583933819


 80% (393 of 488) |#################     | Elapsed Time: 2:49:15 ETA:   0:41:26


SPS: 79 Average Reward: -5.9428762493458365


 80% (394 of 488) |#################     | Elapsed Time: 2:49:40 ETA:   0:40:11


SPS: 79 Average Reward: -6.221027760283997


 80% (395 of 488) |#################     | Elapsed Time: 2:50:07 ETA:   0:40:54


SPS: 79 Average Reward: -4.984749748908119


 81% (396 of 488) |#################     | Elapsed Time: 2:50:33 ETA:   0:39:49


SPS: 79 Average Reward: -6.110382357432633


 81% (397 of 488) |#################     | Elapsed Time: 2:50:58 ETA:   0:39:12


SPS: 79 Average Reward: -5.3864587680560625


 81% (398 of 488) |#################     | Elapsed Time: 2:51:25 ETA:   0:39:02


SPS: 79 Average Reward: -6.3008103943054845


 81% (399 of 488) |#################     | Elapsed Time: 2:51:50 ETA:   0:37:20


SPS: 79 Average Reward: -7.39350773018565


 81% (400 of 488) |##################    | Elapsed Time: 2:52:14 ETA:   0:36:00


SPS: 79 Average Reward: -6.773593231005891


 82% (401 of 488) |##################    | Elapsed Time: 2:52:39 ETA:   0:35:38


SPS: 79 Average Reward: -5.855597285348773


 82% (402 of 488) |##################    | Elapsed Time: 2:53:03 ETA:   0:35:04


SPS: 79 Average Reward: -6.260192740137036


 82% (403 of 488) |##################    | Elapsed Time: 2:53:28 ETA:   0:34:54


SPS: 79 Average Reward: -5.561554018171522


 82% (404 of 488) |##################    | Elapsed Time: 2:53:53 ETA:   0:34:34


SPS: 79 Average Reward: -8.43961131068419


 82% (405 of 488) |##################    | Elapsed Time: 2:54:17 ETA:   0:33:53


SPS: 79 Average Reward: -5.424821663986793


 83% (406 of 488) |##################    | Elapsed Time: 2:54:42 ETA:   0:33:48


SPS: 79 Average Reward: -6.834771119260453


 83% (407 of 488) |##################    | Elapsed Time: 2:55:07 ETA:   0:33:18


SPS: 79 Average Reward: -5.9618172558492475


 83% (408 of 488) |##################    | Elapsed Time: 2:55:31 ETA:   0:32:30


SPS: 79 Average Reward: -6.592575237555419


 83% (409 of 488) |##################    | Elapsed Time: 2:55:55 ETA:   0:32:05


SPS: 79 Average Reward: -5.570590460417624


 84% (410 of 488) |##################    | Elapsed Time: 2:56:20 ETA:   0:31:38


SPS: 79 Average Reward: -8.0104352845995


 84% (411 of 488) |##################    | Elapsed Time: 2:56:44 ETA:   0:31:50


SPS: 79 Average Reward: -6.649744005530981


 84% (412 of 488) |##################    | Elapsed Time: 2:57:09 ETA:   0:31:24


SPS: 79 Average Reward: -5.810235887354148


 84% (413 of 488) |##################    | Elapsed Time: 2:57:34 ETA:   0:30:30


SPS: 79 Average Reward: -6.73520141388478


 84% (414 of 488) |##################    | Elapsed Time: 2:57:58 ETA:   0:30:20


SPS: 79 Average Reward: -5.693495263579117


 85% (415 of 488) |##################    | Elapsed Time: 2:58:23 ETA:   0:30:01


SPS: 79 Average Reward: -6.634524068481318


 85% (416 of 488) |##################    | Elapsed Time: 2:58:48 ETA:   0:29:34


SPS: 79 Average Reward: -5.191748239632661


 85% (417 of 488) |##################    | Elapsed Time: 2:59:12 ETA:   0:28:53


SPS: 79 Average Reward: -5.794008078738425


 85% (418 of 488) |##################    | Elapsed Time: 2:59:37 ETA:   0:28:42


SPS: 79 Average Reward: -5.9282886343418175


 85% (419 of 488) |##################    | Elapsed Time: 3:00:01 ETA:   0:28:11


SPS: 79 Average Reward: -6.494327391033606


 86% (420 of 488) |##################    | Elapsed Time: 3:00:26 ETA:   0:27:53


SPS: 79 Average Reward: -6.281006777948236


 86% (421 of 488) |##################    | Elapsed Time: 3:00:50 ETA:   0:27:29


SPS: 79 Average Reward: -5.855831881383112


 86% (422 of 488) |###################   | Elapsed Time: 3:01:15 ETA:   0:27:15


SPS: 79 Average Reward: -6.17496478116321


 86% (423 of 488) |###################   | Elapsed Time: 3:01:40 ETA:   0:26:43


SPS: 79 Average Reward: -6.391978141912364


 86% (424 of 488) |###################   | Elapsed Time: 3:02:04 ETA:   0:26:11


SPS: 79 Average Reward: -4.871630822027277


 87% (425 of 488) |###################   | Elapsed Time: 3:02:29 ETA:   0:25:38


SPS: 79 Average Reward: -6.7676106599829335


 87% (426 of 488) |###################   | Elapsed Time: 3:02:53 ETA:   0:25:22


SPS: 79 Average Reward: -6.404536869343452


 87% (427 of 488) |###################   | Elapsed Time: 3:03:18 ETA:   0:25:02


SPS: 79 Average Reward: -6.27774819238607


 87% (428 of 488) |###################   | Elapsed Time: 3:03:43 ETA:   0:24:34


SPS: 79 Average Reward: -5.828977234910374


 87% (429 of 488) |###################   | Elapsed Time: 3:04:07 ETA:   0:24:06


SPS: 79 Average Reward: -6.102573014244138


 88% (430 of 488) |###################   | Elapsed Time: 3:04:32 ETA:   0:23:42


SPS: 79 Average Reward: -5.985301156623192


 88% (431 of 488) |###################   | Elapsed Time: 3:04:56 ETA:   0:23:10


SPS: 79 Average Reward: -6.337937620711821


 88% (432 of 488) |###################   | Elapsed Time: 3:05:20 ETA:   0:22:49


SPS: 79 Average Reward: -5.835121645663985


 88% (433 of 488) |###################   | Elapsed Time: 3:05:45 ETA:   0:22:27


SPS: 79 Average Reward: -6.2454352060149105


 88% (434 of 488) |###################   | Elapsed Time: 3:06:09 ETA:   0:21:56


SPS: 79 Average Reward: -6.8709799106923155


 89% (435 of 488) |###################   | Elapsed Time: 3:06:34 ETA:   0:21:37


SPS: 79 Average Reward: -5.985820385459563


 89% (436 of 488) |###################   | Elapsed Time: 3:06:58 ETA:   0:21:10


SPS: 79 Average Reward: -6.170702127173934


 89% (437 of 488) |###################   | Elapsed Time: 3:07:23 ETA:   0:20:45


SPS: 79 Average Reward: -6.707021851123377


 89% (438 of 488) |###################   | Elapsed Time: 3:07:50 ETA:   0:22:41


SPS: 79 Average Reward: -5.868767528383828


 89% (439 of 488) |###################   | Elapsed Time: 3:08:19 ETA:   0:23:23


SPS: 79 Average Reward: -6.381443660423972


 90% (440 of 488) |###################   | Elapsed Time: 3:08:49 ETA:   0:24:35


SPS: 79 Average Reward: -6.353237156341375


 90% (441 of 488) |###################   | Elapsed Time: 3:09:16 ETA:   0:21:07


SPS: 79 Average Reward: -5.284013594397724


 90% (442 of 488) |###################   | Elapsed Time: 3:09:42 ETA:   0:19:41


SPS: 79 Average Reward: -5.499277969175691


 90% (443 of 488) |###################   | Elapsed Time: 3:10:07 ETA:   0:18:57


SPS: 79 Average Reward: -6.019618042693104


 90% (444 of 488) |####################  | Elapsed Time: 3:10:33 ETA:   0:18:45


SPS: 79 Average Reward: -6.465685860637925


 91% (445 of 488) |####################  | Elapsed Time: 3:10:58 ETA:   0:18:15


SPS: 79 Average Reward: -6.384383447893982


 91% (446 of 488) |####################  | Elapsed Time: 3:11:24 ETA:   0:17:42


SPS: 79 Average Reward: -6.403292332661736


 91% (447 of 488) |####################  | Elapsed Time: 3:11:49 ETA:   0:17:13


SPS: 79 Average Reward: -6.323647104011281


 91% (448 of 488) |####################  | Elapsed Time: 3:12:14 ETA:   0:16:39


SPS: 79 Average Reward: -5.518634614828147


 92% (449 of 488) |####################  | Elapsed Time: 3:12:39 ETA:   0:16:23


SPS: 79 Average Reward: -5.642365001496192


 92% (450 of 488) |####################  | Elapsed Time: 3:13:05 ETA:   0:16:26


SPS: 79 Average Reward: -5.746148305362948


 92% (451 of 488) |####################  | Elapsed Time: 3:13:31 ETA:   0:15:50


SPS: 79 Average Reward: -6.25738166970035


 92% (452 of 488) |####################  | Elapsed Time: 3:13:57 ETA:   0:15:53


SPS: 79 Average Reward: -6.863586100003262


 92% (453 of 488) |####################  | Elapsed Time: 3:14:24 ETA:   0:15:27


SPS: 79 Average Reward: -6.239107687436265


 93% (454 of 488) |####################  | Elapsed Time: 3:14:49 ETA:   0:14:29


SPS: 79 Average Reward: -6.182221548588061


 93% (455 of 488) |####################  | Elapsed Time: 3:15:15 ETA:   0:14:09


SPS: 79 Average Reward: -5.920366056791171


 93% (456 of 488) |####################  | Elapsed Time: 3:15:41 ETA:   0:13:50


SPS: 79 Average Reward: -6.192457650913481


 93% (457 of 488) |####################  | Elapsed Time: 3:16:06 ETA:   0:13:10


SPS: 79 Average Reward: -6.094104770534354


 93% (458 of 488) |####################  | Elapsed Time: 3:16:34 ETA:   0:13:47


SPS: 79 Average Reward: -5.1652291173412666


 94% (459 of 488) |####################  | Elapsed Time: 3:17:01 ETA:   0:12:51


SPS: 79 Average Reward: -6.508958517007764


 94% (460 of 488) |####################  | Elapsed Time: 3:17:27 ETA:   0:12:21


SPS: 79 Average Reward: -5.719719095329275


 94% (461 of 488) |####################  | Elapsed Time: 3:17:54 ETA:   0:12:12


SPS: 79 Average Reward: -5.852381720056047


 94% (462 of 488) |####################  | Elapsed Time: 3:18:20 ETA:   0:11:21


SPS: 79 Average Reward: -5.5942839416871175


 94% (463 of 488) |####################  | Elapsed Time: 3:18:47 ETA:   0:11:11


SPS: 79 Average Reward: -6.184588975418336


 95% (464 of 488) |####################  | Elapsed Time: 3:19:13 ETA:   0:10:20


SPS: 79 Average Reward: -5.832540948036906


 95% (465 of 488) |####################  | Elapsed Time: 3:19:41 ETA:   0:10:46


SPS: 79 Average Reward: -6.0233163035812805


 95% (466 of 488) |##################### | Elapsed Time: 3:20:08 ETA:   0:09:58


SPS: 79 Average Reward: -6.335728804768392


 95% (467 of 488) |##################### | Elapsed Time: 3:20:35 ETA:   0:09:19


SPS: 79 Average Reward: -6.0495708226779215


 95% (468 of 488) |##################### | Elapsed Time: 3:21:03 ETA:   0:09:24


SPS: 79 Average Reward: -6.100532816215209


 96% (469 of 488) |##################### | Elapsed Time: 3:21:29 ETA:   0:08:11


SPS: 79 Average Reward: -6.27826165120222


 96% (470 of 488) |##################### | Elapsed Time: 3:21:55 ETA:   0:07:37


SPS: 79 Average Reward: -5.881495148318199


 96% (471 of 488) |##################### | Elapsed Time: 3:22:20 ETA:   0:07:05


SPS: 79 Average Reward: -5.76903123865247


 96% (472 of 488) |##################### | Elapsed Time: 3:22:44 ETA:   0:06:35


SPS: 79 Average Reward: -5.90190198787751


 96% (473 of 488) |##################### | Elapsed Time: 3:23:14 ETA:   0:07:31


SPS: 79 Average Reward: -5.904979956914093


 97% (474 of 488) |##################### | Elapsed Time: 3:23:45 ETA:   0:07:06


SPS: 79 Average Reward: -5.633161835444236


 97% (475 of 488) |##################### | Elapsed Time: 3:24:16 ETA:   0:06:47


SPS: 79 Average Reward: -6.013968407198345


 97% (476 of 488) |##################### | Elapsed Time: 3:24:47 ETA:   0:06:14


SPS: 79 Average Reward: -6.104715834015341


 97% (477 of 488) |##################### | Elapsed Time: 3:25:18 ETA:   0:05:34


SPS: 79 Average Reward: -5.895402888652134


 97% (478 of 488) |##################### | Elapsed Time: 3:25:47 ETA:   0:04:55


SPS: 79 Average Reward: -6.850759630429255


 98% (479 of 488) |##################### | Elapsed Time: 3:26:17 ETA:   0:04:23


SPS: 79 Average Reward: -6.451214222751775


 98% (480 of 488) |##################### | Elapsed Time: 3:26:46 ETA:   0:03:56


SPS: 79 Average Reward: -6.02283574092978


 98% (481 of 488) |##################### | Elapsed Time: 3:27:16 ETA:   0:03:27


SPS: 79 Average Reward: -6.132948894808731


 98% (482 of 488) |##################### | Elapsed Time: 3:27:46 ETA:   0:03:02


SPS: 79 Average Reward: -6.345383583578386


 98% (483 of 488) |##################### | Elapsed Time: 3:28:17 ETA:   0:02:31


SPS: 79 Average Reward: -5.794877282078398


 99% (484 of 488) |##################### | Elapsed Time: 3:28:47 ETA:   0:02:01


SPS: 79 Average Reward: -6.272782411815232


 99% (485 of 488) |##################### | Elapsed Time: 3:29:18 ETA:   0:01:31


SPS: 79 Average Reward: -5.684101138313562


 99% (486 of 488) |##################### | Elapsed Time: 3:29:48 ETA:   0:01:01


SPS: 79 Average Reward: -5.872491653256822


 99% (487 of 488) |##################### | Elapsed Time: 3:30:20 ETA:   0:00:31


SPS: 79 Average Reward: -5.96250430684163


100% (488 of 488) |######################| Elapsed Time: 3:30:50 Time:  3:30:50


In [18]:
torch.save(sender, "models/sender" + str(start_time) + ".pth")
torch.save(receiver, "models/receiver" + str(start_time) + ".pth")

# Direct communication test

In [2]:
class Test_Communication:
    def __init__(self, environment):
        self.environment = environment
        self.observation_space = {"low": [0, 0, 0, 0], "high": [1, 1, 1, 1]}
        self.action_space = {"low": [0, 0, 0, 0], "high": [1, 1, 1, 1]}

    def dynamic(self, agent, actions):
        if "utterance" not in self.environment.data_store[agent].keys():
            self.environment.data_store[agent]["utterance"] = None
        if agent == "receiver":
            utterance = [0, 0, 0, 0]
            if "utterance_max" in self.environment.data_store["sender"].keys():
                observation = self.environment.data_store["sender"]["utterance_max"]
            else:
                observation = utterance
        elif agent == "sender":
            utterance = [0, 0, 0, 0]
            utterance[np.argmax(actions)] = 1
            self.environment.data_store[agent]["utterance"] = actions
            self.environment.data_store[agent]["utterance_max"] = utterance
            observation = [0, 0, 0, 0]
        else:
            print("Dafaq is going on here?")
        return 0, observation, False, {}

In [3]:
xml_files = ["levels/" + file for file in os.listdir("levels/")]
agents = ["sender", "receiver"]

config_dict = {"xmlPath":xml_files, 
                   "agents":agents, 
                   "rewardFunctions":[collision_reward, target_reward], 
                   "doneFunctions":[target_done, border_done], 
                   "skipFrames":5,
                   "environmentDynamics":[Image, Reward, Test_Communication, Accuracy],
                   "freeJoint":True,
                   "renderMode":False,
                   "maxSteps":1024,
                   "agentCameras":True}

env = make_env(config_dict)()

NameError: name 'make_env' is not defined

In [21]:
epochs = 30
num_steps = 1024
lengths = []

for epoch in range(epochs):
    next_obs, infos = reset_environment(env, device)

    next_obs = {k: torch.Tensor(v).unsqueeze(0).to(device) for k, v in next_obs.items()}
    next_done = {"sender": torch.zeros(num_envs).to(device), "receiver": torch.zeros(num_envs).to(device)}

    for step in range(0, num_steps):
        sender_action = sender.get_action_and_value(next_obs["sender"])[0]
        receiver_action = receiver.get_action_and_value(next_obs["receiver"])[0]

        next_obs, reward, terminations, truncations, info = env.step({"sender": sender_action.cpu().numpy()[0], "receiver": receiver_action.cpu().numpy()[0]})
        next_obs = {"sender": torch.Tensor(next_obs["sender"]).unsqueeze(0).to(device), "receiver": torch.Tensor(next_obs["receiver"]).unsqueeze(0).to(device)}

        if terminations["sender"] or terminations["receiver"] or truncations["sender"] or truncations["receiver"]:
            next_obs, infos = reset_environment(env, device)
            lengths.append(step)
            break

In [25]:
dynamic = env.env.env.environment_dynamics[3]
print("Accuracy:", sum(dynamic.accuracies) / len(dynamic.accuracies))
print("Variance:", sum(dynamic.variances) / len(dynamic.variances))
print("Send Accuracy:", sum(dynamic.sendAccuracies) / len(dynamic.sendAccuracies))
print("Length:", sum(lengths) / len(lengths))