In [None]:
import os
import random
import gym
import pylab
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Lambda, Add, Conv2D, Flatten, LSTM, Reshape
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import backend as K
import cv2
import threading
from threading import Thread, Lock
import time
import tensorflow_probability as tfp
from typing import Any, List, Sequence, Tuple

gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=6000)])

#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

tfd = tfp.distributions


class OurModel(tf.keras.Model):
    def __init__(self, input_shape, action_space):
        super(OurModel, self).__init__()
        
        self.flatten = Flatten()
        self.conv_1 = Conv2D(8, 8, 4, padding="valid", activation="relu")
        self.conv_2 = Conv2D(16, 4, 2, padding="valid", activation="relu")
        self.conv_3 = Conv2D(16, 3, 1, padding="valid", activation="relu")
        self.lstm = LSTM(64, return_sequences=True, return_state=True)
        self.dense_1 = Dense(action_space)
        self.dense_2 = Dense(1)
        self.dense_3 = Dense(256, activation='relu')
        
    def call(self, X_input, memory_state, carry_state):
        batch_size = X_input.shape[0]
        
        conv_1 = self.conv_1(X_input)
        conv_2 = self.conv_2(conv_1)
        conv_3 = self.conv_3(conv_2)
        X_input_reshaped = Reshape((36,16))(conv_3)
        
        initial_state = (memory_state, carry_state)
        LSTM_output, final_memory_state, final_carry_state = self.lstm(X_input_reshaped, 
                                                                       initial_state=initial_state)
        
        LSTM_output_flattened = Flatten()(LSTM_output)
        LSTM_output_flattened = self.dense_3(LSTM_output_flattened)
        
        action_logit = self.dense_1(LSTM_output_flattened)
        value = self.dense_2(LSTM_output_flattened)
        
        return action_logit, value, final_memory_state, final_carry_state


def safe_log(x):
  """Computes a safe logarithm which returns 0 if x is zero."""
  return tf.where(
      tf.math.equal(x, 0),
      tf.zeros_like(x),
      tf.math.log(tf.math.maximum(1e-12, x)))


def take_vector_elements(vectors, indices):
    """
    For a batch of vectors, take a single vector component
    out of each vector.
    Args:
      vectors: a [batch x dims] Tensor.
      indices: an int32 Tensor with `batch` entries.
    Returns:
      A Tensor with `batch` entries, one for each vector.
    """
    return tf.gather_nd(vectors, tf.stack([tf.range(tf.shape(vectors)[0]), indices], axis=1))


huber_loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM)
sparse_ce = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.SUM)
mse_loss = tf.keras.losses.MeanSquaredError()


class A3CAgent:
    # Actor-Critic Main Optimization Algorithm
    def __init__(self, env_name):
        # Initialization
        # Environment and PPO parameters
        self.env_name = env_name       
        self.env = gym.make(env_name)
        self.action_size = self.env.action_space.n
        self.EPISODES, self.episode, self.max_average = 2000000, 0, -21.0 # specific for pong
        self.lock = Lock()
        self.lr = 0.000025

        self.ROWS = 80
        self.COLS = 80
        self.REM_STEP = 4

        # Instantiate plot memory
        self.scores, self.episodes, self.average = [], [], []

        self.Save_Path = 'Models'
        self.state_size = (self.REM_STEP, self.ROWS, self.COLS)
        
        if not os.path.exists(self.Save_Path): os.makedirs(self.Save_Path)
        self.path = '{}_A3C_{}'.format(self.env_name, self.lr)
        self.model_name = os.path.join(self.Save_Path, self.path)

        # Create Actor-Critic network model
        self.ActorCritic = OurModel(input_shape=self.state_size, action_space=self.action_size)
        
        self.learning_rate = 0.0001
        self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)
    
    def act(self, state, memory_state, carry_state):
        memory_state = tf.constant(memory_state, tf.float32)
        carry_state = tf.constant(carry_state, tf.float32)
        
        # Use the network to predict the next action to take, using the model
        prediction = self.ActorCritic(state, memory_state, carry_state, training=False)
        action = tf.random.categorical(prediction[0], 1).numpy()

        memory_state = prediction[2].numpy()
        carry_state = prediction[3].numpy()
        
        return action[0][0], memory_state, carry_state

    def discount_rewards(self, reward):
        # Compute the gamma-discounted rewards over an episode
        gamma = 0.99    # discount rate
        running_add = 0
        discounted_r = np.zeros_like(reward)
        for i in reversed(range(0, len(reward))):
            if reward[i] != 0: # reset the sum, since this was a game boundary (pong specific!)
                running_add = 0

            running_add = running_add * gamma + reward[i]
            discounted_r[i] = running_add

        discounted_r -= np.mean(discounted_r) # normalizing the result
        discounted_r /= np.std(discounted_r) # divide by standard deviation

        return discounted_r
    
    def replay(self, states, actions, rewards, memory_states, carry_states):
        # reshape memory to appropriate shape for training
        states = np.vstack(states)
        
        memory_states = np.vstack(memory_states)
        carry_states = np.vstack(carry_states)
        
        batch_size = states.shape[0]
        
        # Compute discounted rewards
        discounted_r = self.discount_rewards(rewards)
        discounted_r_ = np.vstack(discounted_r)
        with tf.GradientTape() as tape:
            action_logits = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
            values = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
            
            memory_state = tf.expand_dims(memory_states[0], 0)
            carry_state = tf.expand_dims(carry_states[0], 0)
            for i in tf.range(0, batch_size):
                prediction = self.ActorCritic(tf.expand_dims(states[i], 0), 
                                              memory_state, carry_state, training=True)
                
                action_logits = action_logits.write(i, prediction[0][0])
                values = values.write(i, prediction[1][0])
                
                memory_state = prediction[2]
                carry_state = prediction[3]
                
            action_logits = action_logits.stack()
            values = values.stack()
            
            action_logits_selected = take_vector_elements(action_logits, actions)
            
            advantages = discounted_r - np.stack(values)[:, 0] 
            
            action_logits_selected = tf.nn.softmax(action_logits_selected)
            action_logits_selected_probs = tf.math.log(action_logits_selected)
            
            action_logits_ = tf.nn.softmax(action_logits)
            #action_logits_ = tf.math.log(action_logits_)
            dist = tfd.Categorical(probs=action_logits_)
            action_log_prob = dist.prob(actions)
            action_log_prob = tf.math.log(action_log_prob)
            #print("action_logits_selected_probs: ", action_logits_selected_probs)
            #print("action_log_prob.shape: ", action_log_prob)
            
            actor_loss = -tf.math.reduce_mean(action_log_prob * advantages) 
            #actor_loss = tf.cast(actor_loss, 'float32')
            
            action_probs = tf.nn.softmax(action_logits)
            #entropy_loss = tf.keras.losses.categorical_crossentropy(action_logits_probs, action_logits_probs)
            #actor_loss = sparse_ce(actions, action_probs, sample_weight=advantages)
            
            critic_loss_ = huber_loss(values, discounted_r)
            critic_loss = mse_loss(values, discounted_r_)
            critic_loss = tf.cast(critic_loss, 'float32')
            #print("critic_loss: ", critic_loss)
            total_loss = actor_loss + critic_loss
        
        #print("total_loss: ", total_loss)
        #print("")
            
        grads = tape.gradient(total_loss, self.ActorCritic.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.ActorCritic.trainable_variables))
        
    def load(self, model_name):
        self.ActorCritic = load_model(model_name, compile=False)
        #self.Critic = load_model(Critic_name, compile=False)

    def save(self):
        self.ActorCritic.save(self.model_name)
        #self.Critic.save(self.Model_name + '_Critic.h5')

    pylab.figure(figsize=(18, 9))
    def PlotModel(self, score, episode):
        self.scores.append(score)
        self.episodes.append(episode)
        self.average.append(sum(self.scores[-50:]) / len(self.scores[-50:]))
        if str(episode)[-2:] == "00":# much faster than episode % 100
            pylab.plot(self.episodes, self.scores, 'b')
            pylab.plot(self.episodes, self.average, 'r')
            pylab.ylabel('Score', fontsize=18)
            pylab.xlabel('Steps', fontsize=18)
            try:
                pylab.savefig(self.path + ".png")
            except OSError:
                pass

        return self.average[-1]
    
    def reset(self, env):
        state = env.reset()
        state = state[35:195:2, ::2,:] / 255.0

        return state
    
    def step(self, action, env):
        next_state, reward, done, info = env.step(action)
        next_state = next_state[35:195:2, ::2,:] / 255.0
        
        return next_state, reward, done, info
    
    def train(self, n_threads):
        self.env.close()
        # Instantiate one environment per thread
        envs = [gym.make(self.env_name) for i in range(n_threads)]

        # Create threads
        threads = [threading.Thread(
                target=self.train_threading,
                daemon=True,
                args=(self, envs[i], i)) for i in range(n_threads)]

        for t in threads:
            time.sleep(2)
            t.start()
            
        for t in threads:
            time.sleep(10)
            t.join()
            
    def train_threading(self, agent, env, thread):
        while self.episode < self.EPISODES:
            # Reset episode
            score, done, SAVING = 0, False, ''
            state = self.reset(env)
            state = np.array([state])

            states, actions, rewards = [], [], []
            memory_states, carry_states = [], []
            
            memory_state = np.zeros([1,64], dtype=np.float32)
            carry_state = np.zeros([1,64], dtype=np.float32)
            while not done:
                action, memory_state, carry_state = agent.act(state, memory_state, carry_state)
                next_state, reward, done, _ = self.step(action, env)
                next_state = np.array([next_state])
                
                states.append(state)
                actions.append(action)
                rewards.append(reward)
                memory_states.append(memory_state)
                carry_states.append(carry_state)

                score += reward
                state = next_state
                    
            self.lock.acquire()
            self.replay(states, actions, rewards, memory_states, carry_states)
            self.lock.release()
            
            states, actions, rewards = [], [], []
                    
            # Update episode count
            with self.lock:
                average = self.PlotModel(score, self.episode)
                # saving best models
                if average >= self.max_average:
                    self.max_average = average
                    #self.save()
                    SAVING = "SAVING"
                else:
                    SAVING = ""

                print("episode: {}/{}, thread: {}, score: {}, average: {:.2f} {}".format(self.episode, self.EPISODES, thread, score, average, SAVING))
                if(self.episode < self.EPISODES):
                    self.episode += 1

        env.close()            

    def test(self, Actor_name, Critic_name):
        self.load(Actor_name, Critic_name)
        for e in range(100):
            state = self.reset(self.env)
            done = False
            score = 0
            while not done:
                self.env.render()
                action = np.argmax(self.Actor.predict(state))
                state, reward, done, _ = self.step(action, self.env, state)
                score += reward
                if done:
                    print("episode: {}/{}, score: {}".format(e, self.EPISODES, score))
                    break

        self.env.close()


if __name__ == "__main__":
    env_name = 'PongDeterministic-v4'
    #env_name = 'Pong-v0'
    agent = A3CAgent(env_name)
    
    #agent.run() # use as A2C
    agent.train(n_threads=1) # use as A3C
    #agent.test('Models/Pong-v0_A3C_2.5e-05_Actor.h5', '')

2022-08-19 19:28:21.610508: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2022-08-19 19:28:22.902798: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-08-19 19:28:22.903566: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-08-19 19:28:22.926819: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-19 19:28:22.927161: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:09:00.0 name: NVIDIA GeForce RTX 3080 computeCapability: 8.6
coreClock: 1.71GHz coreCount: 68 deviceMemorySize: 9.78GiB deviceMemoryBandwidth: 707.88GiB/s
2022-08-19 19:28:22.927175: I tensorflow/stream_executor/platform/def

2022-08-19 19:28:28.383131: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:28.506625: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:28.623654: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:28.740881: W tensorflow/stream_executor/gpu/

2022-08-19 19:28:33.059749: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:33.197955: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:33.336547: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:33.483716: W tensorflow/stream_executor/gpu/

2022-08-19 19:28:45.022412: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:45.176222: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:45.329273: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:45.479639: W tensorflow/stream_executor/gpu/

2022-08-19 19:28:49.321450: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:49.470218: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:49.622306: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:49.777211: W tensorflow/stream_executor/gpu/

2022-08-19 19:28:53.663204: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:53.814499: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:53.966712: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:54.121556: W tensorflow/stream_executor/gpu/

2022-08-19 19:28:58.085826: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'

2022-08-19 19:28:58.241598: W tensorflow/stream_executor/gpu/asm_compiler.cc:235] Your CUDA software stack is old. We fallback to the NVIDIA driver for some compilation. Update your CUDA version to get the best performance. The ptxas error was: ptxas fatal   : Value 'sm_86' is not defined for option 'gpu-name'



episode: 0/2000000, thread: 0, score: -21.0, average: -21.00 SAVING
episode: 1/2000000, thread: 0, score: -21.0, average: -21.00 SAVING
episode: 2/2000000, thread: 0, score: -20.0, average: -20.67 SAVING
episode: 3/2000000, thread: 0, score: -20.0, average: -20.50 SAVING
episode: 4/2000000, thread: 0, score: -20.0, average: -20.40 SAVING
episode: 5/2000000, thread: 0, score: -20.0, average: -20.33 SAVING
episode: 6/2000000, thread: 0, score: -20.0, average: -20.29 SAVING
episode: 7/2000000, thread: 0, score: -21.0, average: -20.38 
episode: 8/2000000, thread: 0, score: -21.0, average: -20.44 
episode: 9/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 10/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 11/2000000, thread: 0, score: -21.0, average: -20.42 
episode: 12/2000000, thread: 0, score: -20.0, average: -20.38 
episode: 13/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 14/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 15/2000000, th

episode: 128/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 129/2000000, thread: 0, score: -20.0, average: -20.50 
episode: 130/2000000, thread: 0, score: -21.0, average: -20.54 
episode: 131/2000000, thread: 0, score: -20.0, average: -20.54 
episode: 132/2000000, thread: 0, score: -20.0, average: -20.52 
episode: 133/2000000, thread: 0, score: -20.0, average: -20.52 
episode: 134/2000000, thread: 0, score: -18.0, average: -20.46 
episode: 135/2000000, thread: 0, score: -19.0, average: -20.42 
episode: 136/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 137/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 138/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 139/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 140/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 141/2000000, thread: 0, score: -20.0, average: -20.38 
episode: 142/2000000, thread: 0, score: -21.0, average: -20.38 
episode: 143/2000000, thread: 0, score: 

episode: 257/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 258/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 259/2000000, thread: 0, score: -20.0, average: -20.38 
episode: 260/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 261/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 262/2000000, thread: 0, score: -21.0, average: -20.34 
episode: 263/2000000, thread: 0, score: -18.0, average: -20.32 
episode: 264/2000000, thread: 0, score: -19.0, average: -20.28 
episode: 265/2000000, thread: 0, score: -19.0, average: -20.24 
episode: 266/2000000, thread: 0, score: -21.0, average: -20.26 
episode: 267/2000000, thread: 0, score: -21.0, average: -20.30 
episode: 268/2000000, thread: 0, score: -19.0, average: -20.28 
episode: 269/2000000, thread: 0, score: -21.0, average: -20.30 
episode: 270/2000000, thread: 0, score: -19.0, average: -20.26 
episode: 271/2000000, thread: 0, score: -19.0, average: -20.22 
episode: 272/2000000, thread: 0, score: 

episode: 386/2000000, thread: 0, score: -20.0, average: -20.64 
episode: 387/2000000, thread: 0, score: -20.0, average: -20.62 
episode: 388/2000000, thread: 0, score: -21.0, average: -20.64 
episode: 389/2000000, thread: 0, score: -20.0, average: -20.62 
episode: 390/2000000, thread: 0, score: -21.0, average: -20.62 
episode: 391/2000000, thread: 0, score: -21.0, average: -20.64 
episode: 392/2000000, thread: 0, score: -19.0, average: -20.62 
episode: 393/2000000, thread: 0, score: -20.0, average: -20.62 
episode: 394/2000000, thread: 0, score: -20.0, average: -20.62 
episode: 395/2000000, thread: 0, score: -21.0, average: -20.62 
episode: 396/2000000, thread: 0, score: -21.0, average: -20.62 
episode: 397/2000000, thread: 0, score: -20.0, average: -20.60 
episode: 398/2000000, thread: 0, score: -20.0, average: -20.58 
episode: 399/2000000, thread: 0, score: -19.0, average: -20.56 
episode: 400/2000000, thread: 0, score: -21.0, average: -20.58 
episode: 401/2000000, thread: 0, score: 

episode: 515/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 516/2000000, thread: 0, score: -20.0, average: -20.24 
episode: 517/2000000, thread: 0, score: -20.0, average: -20.24 
episode: 518/2000000, thread: 0, score: -20.0, average: -20.24 
episode: 519/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 520/2000000, thread: 0, score: -18.0, average: -20.18 
episode: 521/2000000, thread: 0, score: -20.0, average: -20.16 
episode: 522/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 523/2000000, thread: 0, score: -21.0, average: -20.22 
episode: 524/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 525/2000000, thread: 0, score: -20.0, average: -20.22 
episode: 526/2000000, thread: 0, score: -20.0, average: -20.22 
episode: 527/2000000, thread: 0, score: -21.0, average: -20.22 
episode: 528/2000000, thread: 0, score: -21.0, average: -20.28 
episode: 529/2000000, thread: 0, score: -21.0, average: -20.30 
episode: 530/2000000, thread: 0, score: 

episode: 644/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 645/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 646/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 647/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 648/2000000, thread: 0, score: -20.0, average: -20.52 
episode: 649/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 650/2000000, thread: 0, score: -18.0, average: -20.46 
episode: 651/2000000, thread: 0, score: -21.0, average: -20.48 
episode: 652/2000000, thread: 0, score: -19.0, average: -20.46 
episode: 653/2000000, thread: 0, score: -21.0, average: -20.46 
episode: 654/2000000, thread: 0, score: -20.0, average: -20.44 
episode: 655/2000000, thread: 0, score: -18.0, average: -20.38 
episode: 656/2000000, thread: 0, score: -21.0, average: -20.38 
episode: 657/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 658/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 659/2000000, thread: 0, score: 

episode: 773/2000000, thread: 0, score: -21.0, average: -20.48 
episode: 774/2000000, thread: 0, score: -21.0, average: -20.48 
episode: 775/2000000, thread: 0, score: -19.0, average: -20.44 
episode: 776/2000000, thread: 0, score: -21.0, average: -20.44 
episode: 777/2000000, thread: 0, score: -21.0, average: -20.44 
episode: 778/2000000, thread: 0, score: -21.0, average: -20.44 
episode: 779/2000000, thread: 0, score: -20.0, average: -20.42 
episode: 780/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 781/2000000, thread: 0, score: -21.0, average: -20.42 
episode: 782/2000000, thread: 0, score: -21.0, average: -20.42 
episode: 783/2000000, thread: 0, score: -21.0, average: -20.44 
episode: 784/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 785/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 786/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 787/2000000, thread: 0, score: -20.0, average: -20.50 
episode: 788/2000000, thread: 0, score: 

episode: 902/2000000, thread: 0, score: -20.0, average: -20.50 
episode: 903/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 904/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 905/2000000, thread: 0, score: -20.0, average: -20.48 
episode: 906/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 907/2000000, thread: 0, score: -21.0, average: -20.54 
episode: 908/2000000, thread: 0, score: -21.0, average: -20.58 
episode: 909/2000000, thread: 0, score: -21.0, average: -20.58 
episode: 910/2000000, thread: 0, score: -21.0, average: -20.58 
episode: 911/2000000, thread: 0, score: -21.0, average: -20.58 
episode: 912/2000000, thread: 0, score: -18.0, average: -20.54 
episode: 913/2000000, thread: 0, score: -20.0, average: -20.52 
episode: 914/2000000, thread: 0, score: -20.0, average: -20.50 
episode: 915/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 916/2000000, thread: 0, score: -21.0, average: -20.54 
episode: 917/2000000, thread: 0, score: 

episode: 1030/2000000, thread: 0, score: -20.0, average: -20.42 
episode: 1031/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 1032/2000000, thread: 0, score: -20.0, average: -20.38 
episode: 1033/2000000, thread: 0, score: -21.0, average: -20.38 
episode: 1034/2000000, thread: 0, score: -21.0, average: -20.38 
episode: 1035/2000000, thread: 0, score: -21.0, average: -20.38 
episode: 1036/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 1037/2000000, thread: 0, score: -20.0, average: -20.38 
episode: 1038/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 1039/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 1040/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 1041/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 1042/2000000, thread: 0, score: -21.0, average: -20.34 
episode: 1043/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 1044/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 1045/2000000, th

episode: 1157/2000000, thread: 0, score: -21.0, average: -20.56 
episode: 1158/2000000, thread: 0, score: -21.0, average: -20.58 
episode: 1159/2000000, thread: 0, score: -21.0, average: -20.58 
episode: 1160/2000000, thread: 0, score: -21.0, average: -20.58 
episode: 1161/2000000, thread: 0, score: -20.0, average: -20.56 
episode: 1162/2000000, thread: 0, score: -20.0, average: -20.56 
episode: 1163/2000000, thread: 0, score: -20.0, average: -20.54 
episode: 1164/2000000, thread: 0, score: -21.0, average: -20.54 
episode: 1165/2000000, thread: 0, score: -20.0, average: -20.52 
episode: 1166/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 1167/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 1168/2000000, thread: 0, score: -21.0, average: -20.54 
episode: 1169/2000000, thread: 0, score: -21.0, average: -20.54 
episode: 1170/2000000, thread: 0, score: -21.0, average: -20.54 
episode: 1171/2000000, thread: 0, score: -20.0, average: -20.52 
episode: 1172/2000000, th

episode: 1284/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 1285/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 1286/2000000, thread: 0, score: -21.0, average: -20.34 
episode: 1287/2000000, thread: 0, score: -21.0, average: -20.34 
episode: 1288/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 1289/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 1290/2000000, thread: 0, score: -20.0, average: -20.38 
episode: 1291/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 1292/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 1293/2000000, thread: 0, score: -19.0, average: -20.32 
episode: 1294/2000000, thread: 0, score: -19.0, average: -20.30 
episode: 1295/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 1296/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 1297/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 1298/2000000, thread: 0, score: -19.0, average: -20.36 
episode: 1299/2000000, th

episode: 1409/2000000, thread: 0, score: -21.0, average: -20.56 
episode: 1410/2000000, thread: 0, score: -21.0, average: -20.56 
episode: 1411/2000000, thread: 0, score: -20.0, average: -20.54 
episode: 1412/2000000, thread: 0, score: -20.0, average: -20.56 
episode: 1413/2000000, thread: 0, score: -19.0, average: -20.52 
episode: 1414/2000000, thread: 0, score: -21.0, average: -20.52 
episode: 1415/2000000, thread: 0, score: -20.0, average: -20.50 
episode: 1416/2000000, thread: 0, score: -20.0, average: -20.48 
episode: 1417/2000000, thread: 0, score: -20.0, average: -20.48 
episode: 1418/2000000, thread: 0, score: -21.0, average: -20.48 
episode: 1419/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 1420/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 1421/2000000, thread: 0, score: -21.0, average: -20.54 
episode: 1422/2000000, thread: 0, score: -20.0, average: -20.54 
episode: 1423/2000000, thread: 0, score: -20.0, average: -20.52 
episode: 1424/2000000, th

episode: 1536/2000000, thread: 0, score: -18.0, average: -20.14 
episode: 1537/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 1538/2000000, thread: 0, score: -19.0, average: -20.12 
episode: 1539/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 1540/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 1541/2000000, thread: 0, score: -18.0, average: -20.06 
episode: 1542/2000000, thread: 0, score: -20.0, average: -20.06 
episode: 1543/2000000, thread: 0, score: -21.0, average: -20.08 
episode: 1544/2000000, thread: 0, score: -21.0, average: -20.08 
episode: 1545/2000000, thread: 0, score: -19.0, average: -20.10 
episode: 1546/2000000, thread: 0, score: -18.0, average: -20.06 
episode: 1547/2000000, thread: 0, score: -20.0, average: -20.08 
episode: 1548/2000000, thread: 0, score: -21.0, average: -20.10 
episode: 1549/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 1550/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 1551/2000000, th

episode: 1663/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 1664/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 1665/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 1666/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 1667/2000000, thread: 0, score: -21.0, average: -20.10 
episode: 1668/2000000, thread: 0, score: -19.0, average: -20.06 
episode: 1669/2000000, thread: 0, score: -20.0, average: -20.04 
episode: 1670/2000000, thread: 0, score: -20.0, average: -20.04 
episode: 1671/2000000, thread: 0, score: -21.0, average: -20.10 
episode: 1672/2000000, thread: 0, score: -19.0, average: -20.08 
episode: 1673/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 1674/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 1675/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 1676/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 1677/2000000, thread: 0, score: -20.0, average: -20.14 
episode: 1678/2000000, th

episode: 1790/2000000, thread: 0, score: -19.0, average: -20.22 
episode: 1791/2000000, thread: 0, score: -19.0, average: -20.18 
episode: 1792/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 1793/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 1794/2000000, thread: 0, score: -20.0, average: -20.16 
episode: 1795/2000000, thread: 0, score: -20.0, average: -20.16 
episode: 1796/2000000, thread: 0, score: -20.0, average: -20.16 
episode: 1797/2000000, thread: 0, score: -20.0, average: -20.16 
episode: 1798/2000000, thread: 0, score: -21.0, average: -20.16 
episode: 1799/2000000, thread: 0, score: -20.0, average: -20.14 
episode: 1800/2000000, thread: 0, score: -19.0, average: -20.10 
episode: 1801/2000000, thread: 0, score: -21.0, average: -20.10 
episode: 1802/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 1803/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 1804/2000000, thread: 0, score: -19.0, average: -20.10 
episode: 1805/2000000, th

episode: 1917/2000000, thread: 0, score: -21.0, average: -20.16 
episode: 1918/2000000, thread: 0, score: -21.0, average: -20.16 
episode: 1919/2000000, thread: 0, score: -21.0, average: -20.16 
episode: 1920/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 1921/2000000, thread: 0, score: -21.0, average: -20.20 
episode: 1922/2000000, thread: 0, score: -21.0, average: -20.22 
episode: 1923/2000000, thread: 0, score: -21.0, average: -20.22 
episode: 1924/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 1925/2000000, thread: 0, score: -20.0, average: -20.26 
episode: 1926/2000000, thread: 0, score: -19.0, average: -20.24 
episode: 1927/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 1928/2000000, thread: 0, score: -20.0, average: -20.26 
episode: 1929/2000000, thread: 0, score: -20.0, average: -20.24 
episode: 1930/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 1931/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 1932/2000000, th

episode: 2044/2000000, thread: 0, score: -20.0, average: -20.22 
episode: 2045/2000000, thread: 0, score: -19.0, average: -20.20 
episode: 2046/2000000, thread: 0, score: -20.0, average: -20.18 
episode: 2047/2000000, thread: 0, score: -20.0, average: -20.16 
episode: 2048/2000000, thread: 0, score: -21.0, average: -20.16 
episode: 2049/2000000, thread: 0, score: -20.0, average: -20.14 
episode: 2050/2000000, thread: 0, score: -20.0, average: -20.16 
episode: 2051/2000000, thread: 0, score: -20.0, average: -20.14 
episode: 2052/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 2053/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2054/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2055/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2056/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 2057/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 2058/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2059/2000000, th

episode: 2171/2000000, thread: 0, score: -20.0, average: -20.18 
episode: 2172/2000000, thread: 0, score: -18.0, average: -20.14 
episode: 2173/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2174/2000000, thread: 0, score: -19.0, average: -20.10 
episode: 2175/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 2176/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 2177/2000000, thread: 0, score: -19.0, average: -20.06 
episode: 2178/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 2179/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 2180/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 2181/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 2182/2000000, thread: 0, score: -20.0, average: -20.08 
episode: 2183/2000000, thread: 0, score: -21.0, average: -20.08 
episode: 2184/2000000, thread: 0, score: -19.0, average: -20.08 
episode: 2185/2000000, thread: 0, score: -20.0, average: -20.08 
episode: 2186/2000000, th

episode: 2298/2000000, thread: 0, score: -21.0, average: -20.50 
episode: 2299/2000000, thread: 0, score: -19.0, average: -20.46 
episode: 2300/2000000, thread: 0, score: -19.0, average: -20.42 
episode: 2301/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 2302/2000000, thread: 0, score: -20.0, average: -20.44 
episode: 2303/2000000, thread: 0, score: -21.0, average: -20.46 
episode: 2304/2000000, thread: 0, score: -20.0, average: -20.46 
episode: 2305/2000000, thread: 0, score: -21.0, average: -20.48 
episode: 2306/2000000, thread: 0, score: -20.0, average: -20.46 
episode: 2307/2000000, thread: 0, score: -19.0, average: -20.44 
episode: 2308/2000000, thread: 0, score: -19.0, average: -20.42 
episode: 2309/2000000, thread: 0, score: -21.0, average: -20.42 
episode: 2310/2000000, thread: 0, score: -21.0, average: -20.42 
episode: 2311/2000000, thread: 0, score: -19.0, average: -20.40 
episode: 2312/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 2313/2000000, th

episode: 2425/2000000, thread: 0, score: -20.0, average: -20.26 
episode: 2426/2000000, thread: 0, score: -21.0, average: -20.26 
episode: 2427/2000000, thread: 0, score: -20.0, average: -20.28 
episode: 2428/2000000, thread: 0, score: -21.0, average: -20.30 
episode: 2429/2000000, thread: 0, score: -21.0, average: -20.34 
episode: 2430/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 2431/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 2432/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 2433/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 2434/2000000, thread: 0, score: -21.0, average: -20.38 
episode: 2435/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 2436/2000000, thread: 0, score: -18.0, average: -20.34 
episode: 2437/2000000, thread: 0, score: -21.0, average: -20.38 
episode: 2438/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 2439/2000000, thread: 0, score: -20.0, average: -20.36 
episode: 2440/2000000, th

episode: 2552/2000000, thread: 0, score: -21.0, average: -20.04 
episode: 2553/2000000, thread: 0, score: -21.0, average: -20.06 
episode: 2554/2000000, thread: 0, score: -21.0, average: -20.08 
episode: 2555/2000000, thread: 0, score: -20.0, average: -20.08 
episode: 2556/2000000, thread: 0, score: -19.0, average: -20.08 
episode: 2557/2000000, thread: 0, score: -20.0, average: -20.06 
episode: 2558/2000000, thread: 0, score: -21.0, average: -20.08 
episode: 2559/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 2560/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2561/2000000, thread: 0, score: -19.0, average: -20.08 
episode: 2562/2000000, thread: 0, score: -19.0, average: -20.08 
episode: 2563/2000000, thread: 0, score: -20.0, average: -20.08 
episode: 2564/2000000, thread: 0, score: -20.0, average: -20.08 
episode: 2565/2000000, thread: 0, score: -21.0, average: -20.10 
episode: 2566/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2567/2000000, th

episode: 2679/2000000, thread: 0, score: -21.0, average: -20.30 
episode: 2680/2000000, thread: 0, score: -20.0, average: -20.30 
episode: 2681/2000000, thread: 0, score: -20.0, average: -20.28 
episode: 2682/2000000, thread: 0, score: -20.0, average: -20.28 
episode: 2683/2000000, thread: 0, score: -20.0, average: -20.26 
episode: 2684/2000000, thread: 0, score: -20.0, average: -20.24 
episode: 2685/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 2686/2000000, thread: 0, score: -21.0, average: -20.26 
episode: 2687/2000000, thread: 0, score: -21.0, average: -20.28 
episode: 2688/2000000, thread: 0, score: -20.0, average: -20.28 
episode: 2689/2000000, thread: 0, score: -20.0, average: -20.28 
episode: 2690/2000000, thread: 0, score: -20.0, average: -20.28 
episode: 2691/2000000, thread: 0, score: -20.0, average: -20.26 
episode: 2692/2000000, thread: 0, score: -21.0, average: -20.26 
episode: 2693/2000000, thread: 0, score: -19.0, average: -20.24 
episode: 2694/2000000, th

episode: 2806/2000000, thread: 0, score: -21.0, average: -20.20 
episode: 2807/2000000, thread: 0, score: -20.0, average: -20.22 
episode: 2808/2000000, thread: 0, score: -19.0, average: -20.20 
episode: 2809/2000000, thread: 0, score: -18.0, average: -20.16 
episode: 2810/2000000, thread: 0, score: -21.0, average: -20.16 
episode: 2811/2000000, thread: 0, score: -20.0, average: -20.14 
episode: 2812/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2813/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 2814/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 2815/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 2816/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 2817/2000000, thread: 0, score: -20.0, average: -20.14 
episode: 2818/2000000, thread: 0, score: -21.0, average: -20.16 
episode: 2819/2000000, thread: 0, score: -20.0, average: -20.16 
episode: 2820/2000000, thread: 0, score: -18.0, average: -20.12 
episode: 2821/2000000, th

episode: 2933/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 2934/2000000, thread: 0, score: -21.0, average: -20.34 
episode: 2935/2000000, thread: 0, score: -21.0, average: -20.34 
episode: 2936/2000000, thread: 0, score: -20.0, average: -20.32 
episode: 2937/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 2938/2000000, thread: 0, score: -19.0, average: -20.34 
episode: 2939/2000000, thread: 0, score: -21.0, average: -20.34 
episode: 2940/2000000, thread: 0, score: -19.0, average: -20.30 
episode: 2941/2000000, thread: 0, score: -18.0, average: -20.26 
episode: 2942/2000000, thread: 0, score: -19.0, average: -20.26 
episode: 2943/2000000, thread: 0, score: -20.0, average: -20.24 
episode: 2944/2000000, thread: 0, score: -19.0, average: -20.20 
episode: 2945/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 2946/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 2947/2000000, thread: 0, score: -21.0, average: -20.28 
episode: 2948/2000000, th

episode: 3060/2000000, thread: 0, score: -21.0, average: -20.44 
episode: 3061/2000000, thread: 0, score: -20.0, average: -20.42 
episode: 3062/2000000, thread: 0, score: -18.0, average: -20.36 
episode: 3063/2000000, thread: 0, score: -19.0, average: -20.32 
episode: 3064/2000000, thread: 0, score: -21.0, average: -20.32 
episode: 3065/2000000, thread: 0, score: -21.0, average: -20.32 
episode: 3066/2000000, thread: 0, score: -20.0, average: -20.32 
episode: 3067/2000000, thread: 0, score: -19.0, average: -20.28 
episode: 3068/2000000, thread: 0, score: -21.0, average: -20.30 
episode: 3069/2000000, thread: 0, score: -20.0, average: -20.30 
episode: 3070/2000000, thread: 0, score: -20.0, average: -20.28 
episode: 3071/2000000, thread: 0, score: -21.0, average: -20.32 
episode: 3072/2000000, thread: 0, score: -21.0, average: -20.36 
episode: 3073/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 3074/2000000, thread: 0, score: -20.0, average: -20.40 
episode: 3075/2000000, th

episode: 3187/2000000, thread: 0, score: -21.0, average: -20.24 
episode: 3188/2000000, thread: 0, score: -21.0, average: -20.26 
episode: 3189/2000000, thread: 0, score: -21.0, average: -20.26 
episode: 3190/2000000, thread: 0, score: -20.0, average: -20.24 
episode: 3191/2000000, thread: 0, score: -21.0, average: -20.28 
episode: 3192/2000000, thread: 0, score: -20.0, average: -20.26 
episode: 3193/2000000, thread: 0, score: -21.0, average: -20.26 
episode: 3194/2000000, thread: 0, score: -20.0, average: -20.26 
episode: 3195/2000000, thread: 0, score: -21.0, average: -20.28 
episode: 3196/2000000, thread: 0, score: -21.0, average: -20.32 
episode: 3197/2000000, thread: 0, score: -21.0, average: -20.32 
episode: 3198/2000000, thread: 0, score: -20.0, average: -20.34 
episode: 3199/2000000, thread: 0, score: -21.0, average: -20.40 
episode: 3200/2000000, thread: 0, score: -21.0, average: -20.42 
episode: 3201/2000000, thread: 0, score: -21.0, average: -20.42 
episode: 3202/2000000, th

episode: 3314/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 3315/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 3316/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 3317/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 3318/2000000, thread: 0, score: -20.0, average: -20.08 
episode: 3319/2000000, thread: 0, score: -20.0, average: -20.06 
episode: 3320/2000000, thread: 0, score: -21.0, average: -20.10 
episode: 3321/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 3322/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 3323/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 3324/2000000, thread: 0, score: -19.0, average: -20.12 
episode: 3325/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 3326/2000000, thread: 0, score: -19.0, average: -20.12 
episode: 3327/2000000, thread: 0, score: -20.0, average: -20.10 
episode: 3328/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 3329/2000000, th

episode: 3441/2000000, thread: 0, score: -21.0, average: -20.02 
episode: 3442/2000000, thread: 0, score: -21.0, average: -20.04 
episode: 3443/2000000, thread: 0, score: -21.0, average: -20.04 
episode: 3444/2000000, thread: 0, score: -19.0, average: -20.02 
episode: 3445/2000000, thread: 0, score: -19.0, average: -20.00 
episode: 3446/2000000, thread: 0, score: -20.0, average: -20.00 
episode: 3447/2000000, thread: 0, score: -21.0, average: -20.04 
episode: 3448/2000000, thread: 0, score: -21.0, average: -20.06 
episode: 3449/2000000, thread: 0, score: -18.0, average: -20.02 
episode: 3450/2000000, thread: 0, score: -21.0, average: -20.06 
episode: 3451/2000000, thread: 0, score: -20.0, average: -20.04 
episode: 3452/2000000, thread: 0, score: -20.0, average: -20.04 
episode: 3453/2000000, thread: 0, score: -20.0, average: -20.02 
episode: 3454/2000000, thread: 0, score: -20.0, average: -20.00 
episode: 3455/2000000, thread: 0, score: -20.0, average: -19.98 
episode: 3456/2000000, th

episode: 3568/2000000, thread: 0, score: -20.0, average: -20.14 
episode: 3569/2000000, thread: 0, score: -20.0, average: -20.12 
episode: 3570/2000000, thread: 0, score: -19.0, average: -20.10 
episode: 3571/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 3572/2000000, thread: 0, score: -21.0, average: -20.12 
episode: 3573/2000000, thread: 0, score: -21.0, average: -20.14 
episode: 3574/2000000, thread: 0, score: -20.0, average: -20.18 
episode: 3575/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 3576/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 3577/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 3578/2000000, thread: 0, score: -21.0, average: -20.18 
episode: 3579/2000000, thread: 0, score: -19.0, average: -20.18 
episode: 3580/2000000, thread: 0, score: -19.0, average: -20.16 
episode: 3581/2000000, thread: 0, score: -19.0, average: -20.16 
episode: 3582/2000000, thread: 0, score: -21.0, average: -20.16 
episode: 3583/2000000, th