In [None]:
import os
import random
import gym
import pylab
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Lambda, Add, Conv2D, Flatten
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import backend as K
import cv2
import threading
from threading import Thread, Lock
import time
import tensorflow_probability as tfp
from typing import Any, List, Sequence, Tuple

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
#gpus = tf.config.experimental.list_physical_devices('GPU')
#tf.config.experimental.set_virtual_device_configuration(gpus[0],
#          [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])

tfd = tfp.distributions

class OurModel(tf.keras.Model):
    def __init__(self, input_shape, action_space):
        super(OurModel, self).__init__()
        
        self.flatten = Flatten()
        self.dense_0 = Dense(512, activation='relu')
        self.dense_1 = Dense(action_space)
        self.dense_2 = Dense(1)
        
    def call(self, X_input):
        X_input = self.flatten(X_input)
        X_input = self.dense_0(X_input)
        action_logit = self.dense_1(X_input)
        value = self.dense_2(X_input)
        
        return action_logit, value


def safe_log(x):
  """Computes a safe logarithm which returns 0 if x is zero."""
  return tf.where(
      tf.math.equal(x, 0),
      tf.zeros_like(x),
      tf.math.log(tf.math.maximum(1e-12, x)))


def take_vector_elements(vectors, indices):
    """
    For a batch of vectors, take a single vector component
    out of each vector.
    Args:
      vectors: a [batch x dims] Tensor.
      indices: an int32 Tensor with `batch` entries.
    Returns:
      A Tensor with `batch` entries, one for each vector.
    """
    return tf.gather_nd(vectors, tf.stack([tf.range(tf.shape(vectors)[0]), indices], axis=1))


huber_loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM)
sparse_ce = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction=tf.keras.losses.Reduction.SUM)
mse_loss = tf.keras.losses.MeanSquaredError()


class IMPALA_Agent:
    # IMPALA Main Optimization Algorithm
    def __init__(self, env_name):
        # Initialization Environment and parameters
        self.env_name = env_name       
        self.env = gym.make(env_name)
        self.action_size = self.env.action_space.n
        self.EPISODES, self.episode, self.max_average = 2000000, 0, -21.0 # specific for pong
        
        self.memory_size = 50000
        self.memory_1 = []
        self.memory_2 = []
        self.lock = Lock()
        self.lr = 0.0001

        num_hidden_units = 512
    
        self.batch_size = 512
        self.ROWS = 80
        self.COLS = 80
        self.REM_STEP = 4
        
        self.state_size = (self.COLS, self.ROWS, self.REM_STEP)
        self.image_memory = np.zeros(self.state_size)
        
        # Instantiate plot memory
        self.scores, self.episodes, self.average = [], [], []

        self.Save_Path = 'Models'
        
        if not os.path.exists(self.Save_Path): os.makedirs(self.Save_Path)
        self.path = '{}_IMPALA_{}'.format(self.env_name, self.lr)
        self.model_name = os.path.join(self.Save_Path, self.path)

        # Create Actor-Critic network model
        self.model = OurModel(input_shape=self.state_size, action_space=self.action_size)
        
        self.optimizer = tf.keras.optimizers.Adam(self.lr)

    def remember_1(self, state, action, policy, reward, done):
        experience = state, action, policy, reward, done
        if len(self.memory_1) <= self.memory_size:
            self.memory_1.append((experience))
        else:
            self.memory_1 = []
            
    def remember_2(self, state, action, policy, reward, done):
        experience = state, action, policy, reward, done
        if len(self.memory_2) <= self.memory_size:
            self.memory_2.append((experience))
        else:
            self.memory_2 = []
            
    def act(self, state):
        prediction = self.model(state, training=False)
        dist = tfd.Categorical(logits=prediction[0])
        action = int(dist.sample()[0])
        policy = prediction[0]
        
        return action, policy

    def update(self, states, actions, agent_policies, rewards, dones):
        online_variables = self.model.trainable_variables
        with tf.GradientTape() as tape:
            tape.watch(online_variables)
            
            learner_outputs = self.model(states, training=True)
            
            agent_logits = tf.nn.softmax(agent_policies[:-1])
            actions = actions[:-1]
            rewards = rewards[1:]
            dones = dones[1:]
        
            learner_policies = learner_outputs[0]
            learner_logits = tf.nn.softmax(learner_policies[:-1])
            
            learner_values = learner_outputs[1]
            learner_values = tf.squeeze(learner_values)
            
            bootstrap_value = learner_values[-1]
            learner_values = learner_values[:-1]
            
            discounting = 0.99
            discounts = tf.cast(~dones, tf.float32) * discounting
            
            actions = tf.convert_to_tensor(actions, dtype=tf.int32)
                
            target_action_probs = take_vector_elements(learner_logits, actions)
            target_action_log_probs = tf.math.log(target_action_probs)
            
            behaviour_action_probs = take_vector_elements(agent_logits, actions)
            behaviour_action_log_probs = tf.math.log(behaviour_action_probs)
            
            lambda_ = 1.0
            
            log_rhos = target_action_log_probs - behaviour_action_log_probs
            
            log_rhos = tf.convert_to_tensor(log_rhos, dtype=tf.float32)
            discounts = tf.convert_to_tensor(discounts, dtype=tf.float32)
            rewards = tf.convert_to_tensor(rewards, dtype=tf.float32)
            values = tf.convert_to_tensor(learner_values, dtype=tf.float32)
            bootstrap_value = tf.convert_to_tensor(bootstrap_value, dtype=tf.float32)
            
            clip_rho_threshold = tf.convert_to_tensor(1.0, dtype=tf.float32)
            clip_pg_rho_threshold = tf.convert_to_tensor(1.0, dtype=tf.float32)
            
            rhos = tf.math.exp(log_rhos)
            
            clipped_rhos = tf.minimum(clip_rho_threshold, rhos, name='clipped_rhos')
            
            cs = tf.minimum(1.0, rhos, name='cs')
            cs *= tf.convert_to_tensor(lambda_, dtype=tf.float32)

            values_t_plus_1 = tf.concat([values[1:], tf.expand_dims(bootstrap_value, 0)], axis=0)
            deltas = clipped_rhos * (rewards + discounts * values_t_plus_1 - values)
        
            acc = tf.zeros_like(bootstrap_value)
            vs_minus_v_xs = []
            for i in range(int(discounts.shape[0]) - 1, -1, -1):
                discount, c, delta = discounts[i], cs[i], deltas[i]
                acc = delta + discount * c * acc
                vs_minus_v_xs.append(acc)  
            
            vs_minus_v_xs = vs_minus_v_xs[::-1]
            
            vs = tf.add(vs_minus_v_xs, values, name='vs')
            vs_t_plus_1 = tf.concat([vs[1:], tf.expand_dims(bootstrap_value, 0)], axis=0)
            clipped_pg_rhos = tf.minimum(clip_pg_rho_threshold, rhos, name='clipped_pg_rhos')
            
            pg_advantages = (clipped_pg_rhos * (rewards + discounts * vs_t_plus_1 - values))
            
            vs = tf.stop_gradient(vs)
            pg_advantages = tf.stop_gradient(pg_advantages)
            
            actor_loss = -tf.reduce_mean(target_action_log_probs * pg_advantages)
            
            baseline_cost = 0.5
            v_error = values - vs
            critic_loss = baseline_cost * 0.5 * tf.reduce_mean(tf.square(v_error))
            
            total_loss = actor_loss + critic_loss

        grads = tape.gradient(total_loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
    
    def replay_1(self):
        memory_len = len(self.memory_1)
        if len(self.memory_1) > self.batch_size:
            start_index = random.randint(0, memory_len - self.batch_size)
            minibatch = self.memory_1[start_index:start_index+self.batch_size]
        else:
            return

        states = np.zeros((self.batch_size, *self.state_size), dtype=np.float32)
        actions = np.zeros(self.batch_size, dtype=np.int32)
        policies = np.zeros((self.batch_size, self.action_size), dtype=np.float32)
        rewards = np.zeros(self.batch_size, dtype=np.float32)
        dones = np.zeros(self.batch_size, dtype=np.bool)
        for i in range(len(minibatch)):
            states[i] = minibatch[i][0]
            actions[i] = minibatch[i][1]
            policies[i] = minibatch[i][2]
            rewards[i] = minibatch[i][3]
            dones[i] = minibatch[i][4]
            
        self.update(states, actions, policies, rewards, dones)
        
    def replay_2(self):
        memory_len = len(self.memory_2)
        if len(self.memory_2) > self.batch_size:
            start_index = random.randint(0, memory_len - self.batch_size)
            minibatch = self.memory_2[start_index:start_index+self.batch_size]
        else:
            return

        states = np.zeros((self.batch_size, *self.state_size), dtype=np.float32)
        actions = np.zeros(self.batch_size, dtype=np.int32)
        policies = np.zeros((self.batch_size, self.action_size), dtype=np.float32)
        rewards = np.zeros(self.batch_size, dtype=np.float32)
        dones = np.zeros(self.batch_size, dtype=np.bool)
        for i in range(len(minibatch)):
            states[i] = minibatch[i][0]
            actions[i] = minibatch[i][1]
            policies[i] = minibatch[i][2]
            rewards[i] = minibatch[i][3]
            dones[i] = minibatch[i][4]
            
        self.update(states, actions, policies, rewards, dones)
        
    def load(self, model_name):
        self.model = load_model(model_name, compile=False)

    def save(self):
        self.model.save(self.model_name)

    pylab.figure(figsize=(18, 9))
    def PlotModel(self, score, episode):
        self.scores.append(score)
        self.episodes.append(episode)
        self.average.append(sum(self.scores[-50:]) / len(self.scores[-50:]))
        if str(episode)[-2:] == "00":# much faster than episode % 100
            pylab.plot(self.episodes, self.scores, 'b')
            pylab.plot(self.episodes, self.average, 'r')
            pylab.ylabel('Score', fontsize=18)
            pylab.xlabel('Steps', fontsize=18)
            try:
                pylab.savefig(self.path + ".png")
            except OSError:
                pass

        return self.average[-1]
    
    def imshow(self, image, rem_step=0):
        #print("image[:,:,rem_step].shape: ", image[:,:,rem_step].shape)
        
        cv2.imshow("pong" + str(rem_step), image[:,:,rem_step])
        if cv2.waitKey(25) & 0xFF == ord("q"):
            cv2.destroyAllWindows()

    def GetImage(self, frame):
        #print("frame.shape: ", frame.shape)
        
        # croping frame to 80x80 size
        frame_cropped = frame[35:195:2, ::2,:]
        if frame_cropped.shape[0] != self.COLS or frame_cropped.shape[1] != self.ROWS:
            # OpenCV resize function 
            frame_cropped = cv2.resize(frame, (self.COLS, self.ROWS), interpolation=cv2.INTER_CUBIC)
        
        # converting to RGB (numpy way)
        frame_rgb = 0.299*frame_cropped[:,:,0] + 0.587*frame_cropped[:,:,1] + 0.114*frame_cropped[:,:,2]
        
        # converting to Gray (OpenCV way)
        #frame_gray = cv2.cvtColor(frame_cropped, cv2.COLOR_BGR2GRAY)     
        #print("frame_gray.shape: ", frame_gray.shape)
        
        frame_rgb[frame_rgb < 100] = 0
        frame_rgb[frame_rgb >= 100] = 255
        # dividing by 255 we expresses value to 0-1 representation
        new_frame = np.array(frame_rgb).astype(np.float32) / 255.0

        # push our data by 1 frame, similar as deq() function work
        self.image_memory = np.roll(self.image_memory, 1, axis=2)

        # inserting new frame to free space
        self.image_memory[:,:,0] = new_frame

        # show image frame   
        #self.imshow(self.image_memory, 0)
        #self.imshow(self.image_memory, 1)
        #self.imshow(self.image_memory, 2)
        #self.imshow(self.image_memory, 3)

        return np.expand_dims(self.image_memory, axis=0)
        
    def reset(self, env):
        frame = env.reset()
        for i in range(self.REM_STEP):
            state = self.GetImage(frame)

        return state

    def step(self, action, env):
        next_state, reward, done, info = env.step(action)
        next_state = self.GetImage(next_state)
        
        return next_state, reward, done, info
    
    def train(self, n_threads):
        self.env.close()
        # Instantiate one environment per thread
        envs = [gym.make(self.env_name) for i in range(n_threads)]

        # Create threads
        threads = [threading.Thread(
                target=self.train_threading,
                daemon=True,
                args=(self, envs[i], i)) for i in range(n_threads)]

        for t in threads:
            time.sleep(2)
            t.start()
            
        for t in threads:
            time.sleep(10)
            t.join()
    
    def render(self, obs):
        cv2.imshow('obs', obs)
        cv2.waitKey(1)
    
    def train_threading(self, agent, env, thread):
        max_average = 15.0
        total_step_1 = 0
        total_step_2 = 0
        for e in range(self.EPISODES):
            state = self.reset(env)

            done = False
            score = 0
            SAVING = ''
            while not done:
                #self.env.render()
                
                action, policy = self.act(state)
                
                next_state, reward, done, _ = self.step(action, env)
                
                if thread == 0:
                    self.remember_1(state, action, policy, reward / 20.0, done)
                elif thread ==1:
                    self.remember_2(state, action, policy, reward / 20.0, done)
                
                state = next_state
                score += reward
                if done:
                    break
                
                if thread == 0:
                    if total_step_1 % 200 == 0:
                        # train model
                        self.replay_1()
                        #self.lock.release()
                    
                    total_step_1 += 1
                elif thread == 1:
                    if total_step_2 % 201 == 0:
                        # train model
                        self.replay_2()
                        #self.lock.release()
                    
                    total_step_2 += 1
                
            # Update episode count
            with self.lock:
                average = self.PlotModel(score, self.episode)
                # saving best models
                if average >= self.max_average:
                    self.max_average = average
                    #self.save()
                    SAVING = "SAVING"
                else:
                    SAVING = ""
                
                print("total_step_1: ", total_step_1)
                print("total_step_2: ", total_step_2)
                print("episode: {}/{}, thread: {}, score: {}, average: {:.2f} {}".format(self.episode, self.EPISODES, thread, score, average, SAVING))
                if(self.episode < self.EPISODES):
                    self.episode += 1
                 
    def test(self, Actor_name, Critic_name):
        self.load(Actor_name, Critic_name)
        for e in range(100):
            state = self.reset(self.env)
            done = False
            score = 0
            while not done:
                self.env.render()
                action = np.argmax(self.Actor.predict(state))
                state, reward, done, _ = self.step(action, self.env, state)
                score += reward
                if done:
                    print("episode: {}/{}, score: {}".format(e, self.EPISODES, score))
                    break

        self.env.close()


if __name__ == "__main__":
    env_name = 'Pong-v0'
    agent = IMPALA_Agent(env_name)
    
    #agent.run() # use as IMPALA
    agent.train(n_threads=2) # use as IMPALA
    #agent.test('Models/Pong-v0_A3C_2.5e-05_Actor.h5', '')

total_step_1:  1286
total_step_2:  0
episode: 0/2000000, thread: 0, score: -20.0, average: -20.00 SAVING
total_step_1:  0
total_step_2:  1220
episode: 1/2000000, thread: 1, score: -21.0, average: -20.50 
total_step_1:  2457
total_step_2:  0
episode: 2/2000000, thread: 0, score: -21.0, average: -20.67 
total_step_1:  0
total_step_2:  2539
episode: 3/2000000, thread: 1, score: -20.0, average: -20.50 
total_step_1:  3716
total_step_2:  0
episode: 4/2000000, thread: 0, score: -20.0, average: -20.40 
total_step_1:  0
total_step_2:  3634
episode: 5/2000000, thread: 1, score: -21.0, average: -20.50 
total_step_1:  0
total_step_2:  4676
episode: 6/2000000, thread: 1, score: -21.0, average: -20.57 
total_step_1:  4973
total_step_2:  0
episode: 7/2000000, thread: 0, score: -21.0, average: -20.62 
total_step_1:  6002
total_step_2:  0
episode: 8/2000000, thread: 0, score: -21.0, average: -20.67 
total_step_1:  0
total_step_2:  5986
episode: 9/2000000, thread: 1, score: -20.0, average: -20.60 
tota

total_step_1:  50862
total_step_2:  0
episode: 82/2000000, thread: 0, score: -20.0, average: -20.36 
total_step_1:  0
total_step_2:  51620
episode: 83/2000000, thread: 1, score: -20.0, average: -20.36 
total_step_1:  52068
total_step_2:  0
episode: 84/2000000, thread: 0, score: -20.0, average: -20.36 
total_step_1:  0
total_step_2:  52799
episode: 85/2000000, thread: 1, score: -21.0, average: -20.36 
total_step_1:  53201
total_step_2:  0
episode: 86/2000000, thread: 0, score: -21.0, average: -20.40 
total_step_1:  0
total_step_2:  54116
episode: 87/2000000, thread: 1, score: -21.0, average: -20.40 
total_step_1:  54457
total_step_2:  0
episode: 88/2000000, thread: 0, score: -20.0, average: -20.38 
total_step_1:  0
total_step_2:  55146
episode: 89/2000000, thread: 1, score: -21.0, average: -20.40 
total_step_1:  55653
total_step_2:  0
episode: 90/2000000, thread: 0, score: -20.0, average: -20.44 
total_step_1:  0
total_step_2:  56464
episode: 91/2000000, thread: 1, score: -19.0, average

total_step_1:  0
total_step_2:  101965
episode: 162/2000000, thread: 1, score: -21.0, average: -19.94 
total_step_1:  102796
total_step_2:  0
episode: 163/2000000, thread: 0, score: -21.0, average: -19.96 
total_step_1:  0
total_step_2:  103208
episode: 164/2000000, thread: 1, score: -20.0, average: -19.96 
total_step_1:  104009
total_step_2:  0
episode: 165/2000000, thread: 0, score: -20.0, average: -19.94 
total_step_1:  0
total_step_2:  104545
episode: 166/2000000, thread: 1, score: -21.0, average: -19.94 
total_step_1:  105233
total_step_2:  0
episode: 167/2000000, thread: 0, score: -20.0, average: -19.94 
total_step_1:  0
total_step_2:  105881
episode: 168/2000000, thread: 1, score: -20.0, average: -19.98 
total_step_1:  106325
total_step_2:  0
episode: 169/2000000, thread: 0, score: -21.0, average: -19.98 
total_step_1:  0
total_step_2:  107035
episode: 170/2000000, thread: 1, score: -20.0, average: -19.96 
total_step_1:  107720
total_step_2:  0
episode: 171/2000000, thread: 0, s

total_step_1:  154733
total_step_2:  0
episode: 242/2000000, thread: 0, score: -19.0, average: -19.98 
total_step_1:  0
total_step_2:  155845
episode: 243/2000000, thread: 1, score: -21.0, average: -20.02 
total_step_1:  156500
total_step_2:  0
episode: 244/2000000, thread: 0, score: -18.0, average: -19.96 
total_step_1:  0
total_step_2:  157216
episode: 245/2000000, thread: 1, score: -18.0, average: -19.92 
total_step_1:  157949
total_step_2:  0
episode: 246/2000000, thread: 0, score: -18.0, average: -19.88 
total_step_1:  0
total_step_2:  158603
episode: 247/2000000, thread: 1, score: -20.0, average: -19.86 SAVING
total_step_1:  159212
total_step_2:  0
episode: 248/2000000, thread: 0, score: -19.0, average: -19.82 SAVING
total_step_1:  0
total_step_2:  159849
episode: 249/2000000, thread: 1, score: -21.0, average: -19.84 
total_step_1:  160424
total_step_2:  0
episode: 250/2000000, thread: 0, score: -20.0, average: -19.84 
total_step_1:  0
total_step_2:  161321
episode: 251/2000000, 

total_step_1:  0
total_step_2:  209851
episode: 320/2000000, thread: 1, score: -21.0, average: -19.54 
total_step_1:  210655
total_step_2:  0
episode: 321/2000000, thread: 0, score: -19.0, average: -19.50 
total_step_1:  0
total_step_2:  211454
episode: 322/2000000, thread: 1, score: -20.0, average: -19.50 
total_step_1:  212001
total_step_2:  0
episode: 323/2000000, thread: 0, score: -20.0, average: -19.52 
total_step_1:  0
total_step_2:  212645
episode: 324/2000000, thread: 1, score: -21.0, average: -19.58 
total_step_1:  213536
total_step_2:  0
episode: 325/2000000, thread: 0, score: -18.0, average: -19.56 
total_step_1:  0
total_step_2:  214038
episode: 326/2000000, thread: 1, score: -19.0, average: -19.54 
total_step_1:  214919
total_step_2:  0
episode: 327/2000000, thread: 0, score: -20.0, average: -19.56 
total_step_1:  0
total_step_2:  215497
episode: 328/2000000, thread: 1, score: -20.0, average: -19.56 
total_step_1:  216220
total_step_2:  0
episode: 329/2000000, thread: 0, s

total_step_1:  270929
total_step_2:  0
episode: 399/2000000, thread: 0, score: -20.0, average: -19.02 
total_step_1:  0
total_step_2:  272467
episode: 400/2000000, thread: 1, score: -20.0, average: -19.06 
total_step_1:  272226
total_step_2:  0
episode: 401/2000000, thread: 0, score: -21.0, average: -19.10 
total_step_1:  0
total_step_2:  273889
episode: 402/2000000, thread: 1, score: -20.0, average: -19.12 
total_step_1:  273747
total_step_2:  0
episode: 403/2000000, thread: 0, score: -20.0, average: -19.14 
total_step_1:  0
total_step_2:  275500
episode: 404/2000000, thread: 1, score: -18.0, average: -19.12 
total_step_1:  275360
total_step_2:  0
episode: 405/2000000, thread: 0, score: -18.0, average: -19.10 
total_step_1:  0
total_step_2:  276935
episode: 406/2000000, thread: 1, score: -21.0, average: -19.12 
total_step_1:  277153
total_step_2:  0
episode: 407/2000000, thread: 0, score: -19.0, average: -19.14 
total_step_1:  0
total_step_2:  278328
episode: 408/2000000, thread: 1, s

total_step_1:  0
total_step_2:  339666
episode: 479/2000000, thread: 1, score: -17.0, average: -19.14 
total_step_1:  339264
total_step_2:  0
episode: 480/2000000, thread: 0, score: -17.0, average: -19.10 
total_step_1:  0
total_step_2:  341363
episode: 481/2000000, thread: 1, score: -18.0, average: -19.08 
total_step_1:  340944
total_step_2:  0
episode: 482/2000000, thread: 0, score: -20.0, average: -19.12 
total_step_1:  0
total_step_2:  342926
episode: 483/2000000, thread: 1, score: -20.0, average: -19.14 
total_step_1:  342983
total_step_2:  0
episode: 484/2000000, thread: 0, score: -19.0, average: -19.18 
total_step_1:  0
total_step_2:  344643
episode: 485/2000000, thread: 1, score: -20.0, average: -19.16 
total_step_1:  344784
total_step_2:  0
episode: 486/2000000, thread: 0, score: -19.0, average: -19.12 
total_step_1:  0
total_step_2:  346381
episode: 487/2000000, thread: 1, score: -20.0, average: -19.12 
total_step_1:  346238
total_step_2:  0
episode: 488/2000000, thread: 0, s

total_step_1:  410040
total_step_2:  0
episode: 559/2000000, thread: 0, score: -21.0, average: -19.30 
total_step_1:  411685
total_step_2:  0
episode: 560/2000000, thread: 0, score: -19.0, average: -19.32 
total_step_1:  0
total_step_2:  413225
episode: 561/2000000, thread: 1, score: -19.0, average: -19.30 
total_step_1:  413347
total_step_2:  0
episode: 562/2000000, thread: 0, score: -20.0, average: -19.32 
total_step_1:  0
total_step_2:  414926
episode: 563/2000000, thread: 1, score: -19.0, average: -19.34 
total_step_1:  0
total_step_2:  416632
episode: 564/2000000, thread: 1, score: -20.0, average: -19.32 
total_step_1:  415524
total_step_2:  0
episode: 565/2000000, thread: 0, score: -17.0, average: -19.24 
total_step_1:  0
total_step_2:  418247
episode: 566/2000000, thread: 1, score: -21.0, average: -19.26 
total_step_1:  417624
total_step_2:  0
episode: 567/2000000, thread: 0, score: -21.0, average: -19.32 
total_step_1:  0
total_step_2:  420142
episode: 568/2000000, thread: 1, s

total_step_1:  487684
total_step_2:  0
episode: 639/2000000, thread: 0, score: -20.0, average: -18.80 
total_step_1:  0
total_step_2:  490696
episode: 640/2000000, thread: 1, score: -18.0, average: -18.80 
total_step_1:  490270
total_step_2:  0
episode: 641/2000000, thread: 0, score: -17.0, average: -18.72 SAVING
total_step_1:  0
total_step_2:  492767
episode: 642/2000000, thread: 1, score: -18.0, average: -18.76 
total_step_1:  492336
total_step_2:  0
episode: 643/2000000, thread: 0, score: -18.0, average: -18.74 
total_step_1:  0
total_step_2:  495276
episode: 644/2000000, thread: 1, score: -17.0, average: -18.70 SAVING
total_step_1:  494546
total_step_2:  0
episode: 645/2000000, thread: 0, score: -19.0, average: -18.70 SAVING
total_step_1:  0
total_step_2:  497630
episode: 646/2000000, thread: 1, score: -18.0, average: -18.66 SAVING
total_step_1:  496264
total_step_2:  0
episode: 647/2000000, thread: 0, score: -19.0, average: -18.72 
total_step_1:  0
total_step_2:  499675
episode: 6

total_step_1:  0
total_step_2:  572549
episode: 718/2000000, thread: 1, score: -20.0, average: -18.74 
total_step_1:  570684
total_step_2:  0
episode: 719/2000000, thread: 0, score: -16.0, average: -18.68 
total_step_1:  572865
total_step_2:  0
episode: 720/2000000, thread: 0, score: -21.0, average: -18.74 
total_step_1:  0
total_step_2:  574796
episode: 721/2000000, thread: 1, score: -21.0, average: -18.84 
total_step_1:  575157
total_step_2:  0
episode: 722/2000000, thread: 0, score: -19.0, average: -18.80 
total_step_1:  0
total_step_2:  577118
episode: 723/2000000, thread: 1, score: -18.0, average: -18.76 
total_step_1:  577575
total_step_2:  0
episode: 724/2000000, thread: 0, score: -20.0, average: -18.86 
total_step_1:  0
total_step_2:  579821
episode: 725/2000000, thread: 1, score: -18.0, average: -18.86 
total_step_1:  579815
total_step_2:  0
episode: 726/2000000, thread: 0, score: -19.0, average: -18.90 
total_step_1:  0
total_step_2:  581966
episode: 727/2000000, thread: 1, s

total_step_1:  0
total_step_2:  660366
episode: 798/2000000, thread: 1, score: -19.0, average: -18.92 
total_step_1:  659634
total_step_2:  0
episode: 799/2000000, thread: 0, score: -17.0, average: -18.90 
total_step_1:  0
total_step_2:  662705
episode: 800/2000000, thread: 1, score: -21.0, average: -18.90 
total_step_1:  661509
total_step_2:  0
episode: 801/2000000, thread: 0, score: -20.0, average: -18.90 
total_step_1:  0
total_step_2:  665390
episode: 802/2000000, thread: 1, score: -18.0, average: -18.92 
total_step_1:  663793
total_step_2:  0
episode: 803/2000000, thread: 0, score: -17.0, average: -18.88 
total_step_1:  0
total_step_2:  667600
episode: 804/2000000, thread: 1, score: -20.0, average: -18.98 
total_step_1:  666146
total_step_2:  0
episode: 805/2000000, thread: 0, score: -20.0, average: -19.00 
total_step_1:  0
total_step_2:  669670
episode: 806/2000000, thread: 1, score: -21.0, average: -19.02 
total_step_1:  668540
total_step_2:  0
episode: 807/2000000, thread: 0, s

total_step_1:  746808
total_step_2:  0
episode: 878/2000000, thread: 0, score: -19.0, average: -18.88 
total_step_1:  0
total_step_2:  750569
episode: 879/2000000, thread: 1, score: -18.0, average: -18.92 
total_step_1:  749010
total_step_2:  0
episode: 880/2000000, thread: 0, score: -18.0, average: -18.86 
total_step_1:  0
total_step_2:  752944
episode: 881/2000000, thread: 1, score: -18.0, average: -18.84 
total_step_1:  751226
total_step_2:  0
episode: 882/2000000, thread: 0, score: -18.0, average: -18.84 
total_step_1:  0
total_step_2:  754627
episode: 883/2000000, thread: 1, score: -21.0, average: -18.86 
total_step_1:  752693
total_step_2:  0
episode: 884/2000000, thread: 0, score: -21.0, average: -18.90 
total_step_1:  0
total_step_2:  756663
episode: 885/2000000, thread: 1, score: -17.0, average: -18.88 
total_step_1:  754341
total_step_2:  0
episode: 886/2000000, thread: 0, score: -20.0, average: -18.86 
total_step_1:  0
total_step_2:  758754
episode: 887/2000000, thread: 1, s

total_step_1:  0
total_step_2:  838270
episode: 959/2000000, thread: 1, score: -19.0, average: -18.48 
total_step_1:  835858
total_step_2:  0
episode: 960/2000000, thread: 0, score: -16.0, average: -18.46 
total_step_1:  838108
total_step_2:  0
episode: 961/2000000, thread: 0, score: -17.0, average: -18.42 
total_step_1:  0
total_step_2:  840996
episode: 962/2000000, thread: 1, score: -17.0, average: -18.38 
total_step_1:  0
total_step_2:  843261
episode: 963/2000000, thread: 1, score: -20.0, average: -18.44 
total_step_1:  840607
total_step_2:  0
episode: 964/2000000, thread: 0, score: -13.0, average: -18.36 
total_step_1:  0
total_step_2:  845700
episode: 965/2000000, thread: 1, score: -18.0, average: -18.36 
total_step_1:  843157
total_step_2:  0
episode: 966/2000000, thread: 0, score: -19.0, average: -18.32 
total_step_1:  845343
total_step_2:  0
episode: 967/2000000, thread: 0, score: -18.0, average: -18.30 SAVING
total_step_1:  0
total_step_2:  848155
episode: 968/2000000, thread

total_step_1:  930303
total_step_2:  0
episode: 1037/2000000, thread: 0, score: -18.0, average: -17.90 
total_step_1:  0
total_step_2:  935561
episode: 1038/2000000, thread: 1, score: -17.0, average: -17.92 
total_step_1:  933129
total_step_2:  0
episode: 1039/2000000, thread: 0, score: -14.0, average: -17.82 
total_step_1:  0
total_step_2:  938111
episode: 1040/2000000, thread: 1, score: -16.0, average: -17.80 
total_step_1:  935391
total_step_2:  0
episode: 1041/2000000, thread: 0, score: -19.0, average: -17.84 
total_step_1:  0
total_step_2:  940483
episode: 1042/2000000, thread: 1, score: -17.0, average: -17.76 SAVING
total_step_1:  938104
total_step_2:  0
episode: 1043/2000000, thread: 0, score: -18.0, average: -17.78 
total_step_1:  0
total_step_2:  942514
episode: 1044/2000000, thread: 1, score: -19.0, average: -17.80 
total_step_1:  940644
total_step_2:  0
episode: 1045/2000000, thread: 0, score: -17.0, average: -17.76 SAVING
total_step_1:  0
total_step_2:  945000
episode: 1046

total_step_1:  0
total_step_2:  1026934
episode: 1115/2000000, thread: 1, score: -14.0, average: -17.28 SAVING
total_step_1:  1024660
total_step_2:  0
episode: 1116/2000000, thread: 0, score: -16.0, average: -17.24 SAVING
total_step_1:  0
total_step_2:  1029015
episode: 1117/2000000, thread: 1, score: -16.0, average: -17.16 SAVING
total_step_1:  1026933
total_step_2:  0
episode: 1118/2000000, thread: 0, score: -17.0, average: -17.18 
total_step_1:  0
total_step_2:  1031187
episode: 1119/2000000, thread: 1, score: -18.0, average: -17.12 SAVING
total_step_1:  1028684
total_step_2:  0
episode: 1120/2000000, thread: 0, score: -19.0, average: -17.16 
total_step_1:  0
total_step_2:  1033777
episode: 1121/2000000, thread: 1, score: -17.0, average: -17.16 
total_step_1:  1030997
total_step_2:  0
episode: 1122/2000000, thread: 0, score: -15.0, average: -17.08 SAVING
total_step_1:  1032965
total_step_2:  0
episode: 1123/2000000, thread: 0, score: -19.0, average: -17.06 SAVING
total_step_1:  0
to

total_step_1:  1114032
total_step_2:  0
episode: 1193/2000000, thread: 0, score: -18.0, average: -16.42 SAVING
total_step_1:  0
total_step_2:  1117350
episode: 1194/2000000, thread: 1, score: -18.0, average: -16.44 
total_step_1:  0
total_step_2:  1119314
episode: 1195/2000000, thread: 1, score: -17.0, average: -16.50 
total_step_1:  1116181
total_step_2:  0
episode: 1196/2000000, thread: 0, score: -18.0, average: -16.58 
total_step_1:  1118699
total_step_2:  0
episode: 1197/2000000, thread: 0, score: -17.0, average: -16.58 
total_step_1:  0
total_step_2:  1121958
episode: 1198/2000000, thread: 1, score: -17.0, average: -16.56 
total_step_1:  1120856
total_step_2:  0
episode: 1199/2000000, thread: 0, score: -16.0, average: -16.48 
total_step_1:  0
total_step_2:  1124531
episode: 1200/2000000, thread: 1, score: -14.0, average: -16.42 SAVING
total_step_1:  0
total_step_2:  1126444
episode: 1201/2000000, thread: 1, score: -18.0, average: -16.46 
total_step_1:  1123411
total_step_2:  0
epi

total_step_1:  1205812
total_step_2:  0
episode: 1271/2000000, thread: 0, score: -18.0, average: -16.30 
total_step_1:  0
total_step_2:  1210578
episode: 1272/2000000, thread: 1, score: -16.0, average: -16.30 
total_step_1:  1207972
total_step_2:  0
episode: 1273/2000000, thread: 0, score: -17.0, average: -16.28 
total_step_1:  0
total_step_2:  1212895
episode: 1274/2000000, thread: 1, score: -17.0, average: -16.42 
total_step_1:  1209996
total_step_2:  0
episode: 1275/2000000, thread: 0, score: -18.0, average: -16.42 
total_step_1:  0
total_step_2:  1215069
episode: 1276/2000000, thread: 1, score: -17.0, average: -16.38 
total_step_1:  1211903
total_step_2:  0
episode: 1277/2000000, thread: 0, score: -19.0, average: -16.48 
total_step_1:  0
total_step_2:  1217291
episode: 1278/2000000, thread: 1, score: -16.0, average: -16.44 
total_step_1:  1214588
total_step_2:  0
episode: 1279/2000000, thread: 0, score: -14.0, average: -16.38 
total_step_1:  0
total_step_2:  1219875
episode: 1280/2

total_step_1:  0
total_step_2:  1306908
episode: 1349/2000000, thread: 1, score: -16.0, average: -15.38 
total_step_1:  1303062
total_step_2:  0
episode: 1350/2000000, thread: 0, score: -15.0, average: -15.36 
total_step_1:  0
total_step_2:  1309575
episode: 1351/2000000, thread: 1, score: -14.0, average: -15.36 
total_step_1:  1306070
total_step_2:  0
episode: 1352/2000000, thread: 0, score: -14.0, average: -15.32 
total_step_1:  0
total_step_2:  1312356
episode: 1353/2000000, thread: 1, score: -16.0, average: -15.32 
total_step_1:  1308439
total_step_2:  0
episode: 1354/2000000, thread: 0, score: -18.0, average: -15.34 
total_step_1:  0
total_step_2:  1314819
episode: 1355/2000000, thread: 1, score: -14.0, average: -15.34 
total_step_1:  1310993
total_step_2:  0
episode: 1356/2000000, thread: 0, score: -16.0, average: -15.36 
total_step_1:  1313362
total_step_2:  0
episode: 1357/2000000, thread: 0, score: -17.0, average: -15.42 
total_step_1:  0
total_step_2:  1317821
episode: 1358/2

total_step_1:  1403568
total_step_2:  0
episode: 1427/2000000, thread: 0, score: -13.0, average: -14.94 SAVING
total_step_1:  0
total_step_2:  1409243
episode: 1428/2000000, thread: 1, score: -16.0, average: -15.00 
total_step_1:  1405906
total_step_2:  0
episode: 1429/2000000, thread: 0, score: -16.0, average: -15.00 
total_step_1:  0
total_step_2:  1412093
episode: 1430/2000000, thread: 1, score: -11.0, average: -14.94 SAVING
total_step_1:  1408540
total_step_2:  0
episode: 1431/2000000, thread: 0, score: -12.0, average: -14.90 SAVING
total_step_1:  0
total_step_2:  1414515
episode: 1432/2000000, thread: 1, score: -15.0, average: -14.90 SAVING
total_step_1:  1411225
total_step_2:  0
episode: 1433/2000000, thread: 0, score: -12.0, average: -14.82 SAVING
total_step_1:  0
total_step_2:  1417033
episode: 1434/2000000, thread: 1, score: -16.0, average: -14.74 SAVING
total_step_1:  1413942
total_step_2:  0
episode: 1435/2000000, thread: 0, score: -14.0, average: -14.64 SAVING
total_step_1:

total_step_1:  0
total_step_2:  1507825
episode: 1504/2000000, thread: 1, score: -17.0, average: -14.46 
total_step_1:  0
total_step_2:  1510402
episode: 1505/2000000, thread: 1, score: -15.0, average: -14.48 
total_step_1:  1506128
total_step_2:  0
episode: 1506/2000000, thread: 0, score: -9.0, average: -14.44 
total_step_1:  0
total_step_2:  1512653
episode: 1507/2000000, thread: 1, score: -16.0, average: -14.46 
total_step_1:  1508497
total_step_2:  0
episode: 1508/2000000, thread: 0, score: -15.0, average: -14.48 
total_step_1:  0
total_step_2:  1515068
episode: 1509/2000000, thread: 1, score: -15.0, average: -14.46 
total_step_1:  1511134
total_step_2:  0
episode: 1510/2000000, thread: 0, score: -14.0, average: -14.48 
total_step_1:  0
total_step_2:  1517350
episode: 1511/2000000, thread: 1, score: -17.0, average: -14.58 
total_step_1:  1513505
total_step_2:  0
episode: 1512/2000000, thread: 0, score: -18.0, average: -14.60 
total_step_1:  0
total_step_2:  1519607
episode: 1513/20

total_step_1:  0
total_step_2:  1617894
episode: 1582/2000000, thread: 1, score: -12.0, average: -12.98 
total_step_1:  1613964
total_step_2:  0
episode: 1583/2000000, thread: 0, score: -13.0, average: -12.98 
total_step_1:  0
total_step_2:  1620445
episode: 1584/2000000, thread: 1, score: -15.0, average: -13.16 
total_step_1:  1617579
total_step_2:  0
episode: 1585/2000000, thread: 0, score: -7.0, average: -13.02 
total_step_1:  0
total_step_2:  1623257
episode: 1586/2000000, thread: 1, score: -14.0, average: -13.00 
total_step_1:  1620106
total_step_2:  0
episode: 1587/2000000, thread: 0, score: -16.0, average: -13.06 
total_step_1:  0
total_step_2:  1625830
episode: 1588/2000000, thread: 1, score: -16.0, average: -13.20 
total_step_1:  1623425
total_step_2:  0
episode: 1589/2000000, thread: 0, score: -11.0, average: -13.24 
total_step_1:  0
total_step_2:  1629050
episode: 1590/2000000, thread: 1, score: -12.0, average: -13.18 
total_step_1:  1626584
total_step_2:  0
episode: 1591/20

total_step_1:  1727173
total_step_2:  0
episode: 1660/2000000, thread: 0, score: -19.0, average: -13.48 
total_step_1:  0
total_step_2:  1732744
episode: 1661/2000000, thread: 1, score: -12.0, average: -13.46 
total_step_1:  1729683
total_step_2:  0
episode: 1662/2000000, thread: 0, score: -16.0, average: -13.46 
total_step_1:  0
total_step_2:  1735116
episode: 1663/2000000, thread: 1, score: -16.0, average: -13.42 
total_step_1:  1732255
total_step_2:  0
episode: 1664/2000000, thread: 0, score: -15.0, average: -13.52 
total_step_1:  0
total_step_2:  1737899
episode: 1665/2000000, thread: 1, score: -14.0, average: -13.54 
total_step_1:  1735226
total_step_2:  0
episode: 1666/2000000, thread: 0, score: -14.0, average: -13.58 
total_step_1:  0
total_step_2:  1740725
episode: 1667/2000000, thread: 1, score: -13.0, average: -13.56 
total_step_1:  0
total_step_2:  1743356
episode: 1668/2000000, thread: 1, score: -17.0, average: -13.64 
total_step_1:  1738319
total_step_2:  0
episode: 1669/2

total_step_1:  0
total_step_2:  1846074
episode: 1739/2000000, thread: 1, score: -18.0, average: -13.42 
total_step_1:  1841705
total_step_2:  0
episode: 1740/2000000, thread: 0, score: -12.0, average: -13.42 
total_step_1:  0
total_step_2:  1849525
episode: 1741/2000000, thread: 1, score: -9.0, average: -13.26 
total_step_1:  1844532
total_step_2:  0
episode: 1742/2000000, thread: 0, score: -13.0, average: -13.20 
total_step_1:  0
total_step_2:  1852459
episode: 1743/2000000, thread: 1, score: -12.0, average: -13.12 
total_step_1:  1847632
total_step_2:  0
episode: 1744/2000000, thread: 0, score: -12.0, average: -13.14 
total_step_1:  0
total_step_2:  1855505
episode: 1745/2000000, thread: 1, score: -11.0, average: -13.08 
total_step_1:  1850327
total_step_2:  0
episode: 1746/2000000, thread: 0, score: -14.0, average: -13.00 
total_step_1:  1853329
total_step_2:  0
episode: 1747/2000000, thread: 0, score: -9.0, average: -12.98 
total_step_1:  0
total_step_2:  1859069
episode: 1748/200

total_step_1:  0
total_step_2:  1964151
episode: 1818/2000000, thread: 1, score: -13.0, average: -12.88 
total_step_1:  1959333
total_step_2:  0
episode: 1819/2000000, thread: 0, score: -14.0, average: -12.84 
total_step_1:  0
total_step_2:  1967828
episode: 1820/2000000, thread: 1, score: -6.0, average: -12.84 
total_step_1:  1963148
total_step_2:  0
episode: 1821/2000000, thread: 0, score: -6.0, average: -12.82 
total_step_1:  0
total_step_2:  1970388
episode: 1822/2000000, thread: 1, score: -17.0, average: -12.84 
total_step_1:  1966371
total_step_2:  0
episode: 1823/2000000, thread: 0, score: -10.0, average: -12.68 
total_step_1:  0
total_step_2:  1972861
episode: 1824/2000000, thread: 1, score: -16.0, average: -12.66 
total_step_1:  1968544
total_step_2:  0
episode: 1825/2000000, thread: 0, score: -16.0, average: -12.64 
total_step_1:  0
total_step_2:  1976139
episode: 1826/2000000, thread: 1, score: -11.0, average: -12.58 
total_step_1:  1972038
total_step_2:  0
episode: 1827/200

total_step_1:  0
total_step_2:  2083416
episode: 1896/2000000, thread: 1, score: -11.0, average: -12.24 
total_step_1:  0
total_step_2:  2085324
episode: 1897/2000000, thread: 1, score: -18.0, average: -12.38 
total_step_1:  2079634
total_step_2:  0
episode: 1898/2000000, thread: 0, score: -13.0, average: -12.34 
total_step_1:  2082927
total_step_2:  0
episode: 1899/2000000, thread: 0, score: -13.0, average: -12.26 
total_step_1:  0
total_step_2:  2089404
episode: 1900/2000000, thread: 1, score: -10.0, average: -12.30 
total_step_1:  2085693
total_step_2:  0
episode: 1901/2000000, thread: 0, score: -14.0, average: -12.26 
total_step_1:  0
total_step_2:  2092313
episode: 1902/2000000, thread: 1, score: -9.0, average: -12.34 
total_step_1:  2089132
total_step_2:  0
episode: 1903/2000000, thread: 0, score: -9.0, average: -12.28 
total_step_1:  0
total_step_2:  2095242
episode: 1904/2000000, thread: 1, score: -16.0, average: -12.36 
total_step_1:  0
total_step_2:  2097925
episode: 1905/200

total_step_1:  0
total_step_2:  2205617
episode: 1974/2000000, thread: 1, score: -14.0, average: -11.98 
total_step_1:  2199914
total_step_2:  0
episode: 1975/2000000, thread: 0, score: -14.0, average: -12.12 
total_step_1:  2202896
total_step_2:  0
episode: 1976/2000000, thread: 0, score: -10.0, average: -12.16 
total_step_1:  0
total_step_2:  2209255
episode: 1977/2000000, thread: 1, score: -12.0, average: -12.14 
total_step_1:  0
total_step_2:  2212099
episode: 1978/2000000, thread: 1, score: -13.0, average: -12.28 
total_step_1:  2206313
total_step_2:  0
episode: 1979/2000000, thread: 0, score: -9.0, average: -12.26 
total_step_1:  0
total_step_2:  2215421
episode: 1980/2000000, thread: 1, score: -10.0, average: -12.24 
total_step_1:  2209720
total_step_2:  0
episode: 1981/2000000, thread: 0, score: -9.0, average: -12.16 
total_step_1:  2212129
total_step_2:  0
episode: 1982/2000000, thread: 0, score: -16.0, average: -12.18 
total_step_1:  0
total_step_2:  2219105
episode: 1983/200

total_step_1:  0
total_step_2:  2329504
episode: 2052/2000000, thread: 1, score: -6.0, average: -11.48 
total_step_1:  2323801
total_step_2:  0
episode: 2053/2000000, thread: 0, score: -9.0, average: -11.44 
total_step_1:  0
total_step_2:  2332277
episode: 2054/2000000, thread: 1, score: -11.0, average: -11.42 
total_step_1:  2327276
total_step_2:  0
episode: 2055/2000000, thread: 0, score: -9.0, average: -11.28 
total_step_1:  0
total_step_2:  2335501
episode: 2056/2000000, thread: 1, score: -10.0, average: -11.28 
total_step_1:  2330211
total_step_2:  0
episode: 2057/2000000, thread: 0, score: -14.0, average: -11.26 
total_step_1:  0
total_step_2:  2338461
episode: 2058/2000000, thread: 1, score: -12.0, average: -11.24 
total_step_1:  2333953
total_step_2:  0
episode: 2059/2000000, thread: 0, score: -8.0, average: -11.22 
total_step_1:  0
total_step_2:  2341499
episode: 2060/2000000, thread: 1, score: -12.0, average: -11.22 
total_step_1:  2336567
total_step_2:  0
episode: 2061/20000

total_step_1:  0
total_step_2:  2454727
episode: 2130/2000000, thread: 1, score: -11.0, average: -11.86 
total_step_1:  2448293
total_step_2:  0
episode: 2131/2000000, thread: 0, score: -4.0, average: -11.68 
total_step_1:  2450555
total_step_2:  0
episode: 2132/2000000, thread: 0, score: -14.0, average: -11.64 
total_step_1:  0
total_step_2:  2458031
episode: 2133/2000000, thread: 1, score: -12.0, average: -11.70 
total_step_1:  0
total_step_2:  2460722
episode: 2134/2000000, thread: 1, score: -11.0, average: -11.72 
total_step_1:  2453741
total_step_2:  0
episode: 2135/2000000, thread: 0, score: -11.0, average: -11.78 
total_step_1:  2456108
total_step_2:  0
episode: 2136/2000000, thread: 0, score: -13.0, average: -11.76 
total_step_1:  0
total_step_2:  2463722
episode: 2137/2000000, thread: 1, score: -10.0, average: -11.86 
total_step_1:  2458841
total_step_2:  0
episode: 2138/2000000, thread: 0, score: -14.0, average: -11.92 
total_step_1:  0
total_step_2:  2466692
episode: 2139/20

total_step_1:  2570423
total_step_2:  0
episode: 2209/2000000, thread: 0, score: -10.0, average: -11.24 
total_step_1:  2573371
total_step_2:  0
episode: 2210/2000000, thread: 0, score: -15.0, average: -11.32 
total_step_1:  0
total_step_2:  2581133
episode: 2211/2000000, thread: 1, score: -6.0, average: -11.28 
total_step_1:  0
total_step_2:  2583777
episode: 2212/2000000, thread: 1, score: -16.0, average: -11.42 
total_step_1:  2577306
total_step_2:  0
episode: 2213/2000000, thread: 0, score: -6.0, average: -11.26 
total_step_1:  0
total_step_2:  2587696
episode: 2214/2000000, thread: 1, score: -6.0, average: -11.12 
total_step_1:  2581619
total_step_2:  0
episode: 2215/2000000, thread: 0, score: -7.0, average: -10.98 
total_step_1:  0
total_step_2:  2590530
episode: 2216/2000000, thread: 1, score: -13.0, average: -10.96 
total_step_1:  2585749
total_step_2:  0
episode: 2217/2000000, thread: 0, score: -6.0, average: -10.84 
total_step_1:  0
total_step_2:  2594819
episode: 2218/200000

total_step_1:  0
total_step_2:  2715278
episode: 2287/2000000, thread: 1, score: -10.0, average: -11.10 
total_step_1:  2707314
total_step_2:  0
episode: 2288/2000000, thread: 0, score: -14.0, average: -11.14 
total_step_1:  0
total_step_2:  2718152
episode: 2289/2000000, thread: 1, score: -15.0, average: -11.22 
total_step_1:  2710322
total_step_2:  0
episode: 2290/2000000, thread: 0, score: -15.0, average: -11.34 
total_step_1:  0
total_step_2:  2721687
episode: 2291/2000000, thread: 1, score: -11.0, average: -11.32 
total_step_1:  2713795
total_step_2:  0
episode: 2292/2000000, thread: 0, score: -9.0, average: -11.28 
total_step_1:  0
total_step_2:  2724766
episode: 2293/2000000, thread: 1, score: -13.0, average: -11.30 
total_step_1:  2717895
total_step_2:  0
episode: 2294/2000000, thread: 0, score: -8.0, average: -11.30 
total_step_1:  0
total_step_2:  2728721
episode: 2295/2000000, thread: 1, score: -11.0, average: -11.34 
total_step_1:  2721524
total_step_2:  0
episode: 2296/200

total_step_1:  2840348
total_step_2:  0
episode: 2366/2000000, thread: 0, score: -15.0, average: -11.26 
total_step_1:  0
total_step_2:  2849043
episode: 2367/2000000, thread: 1, score: -16.0, average: -11.26 
total_step_1:  2843165
total_step_2:  0
episode: 2368/2000000, thread: 0, score: -16.0, average: -11.36 
total_step_1:  0
total_step_2:  2852129
episode: 2369/2000000, thread: 1, score: -8.0, average: -11.26 
total_step_1:  2846554
total_step_2:  0
episode: 2370/2000000, thread: 0, score: -8.0, average: -11.24 
total_step_1:  0
total_step_2:  2855690
episode: 2371/2000000, thread: 1, score: -12.0, average: -11.14 
total_step_1:  2850399
total_step_2:  0
episode: 2372/2000000, thread: 0, score: -5.0, average: -11.10 
total_step_1:  0
total_step_2:  2859119
episode: 2373/2000000, thread: 1, score: -9.0, average: -11.08 
total_step_1:  2853480
total_step_2:  0
episode: 2374/2000000, thread: 0, score: -15.0, average: -11.04 
total_step_1:  0
total_step_2:  2863463
episode: 2375/20000

total_step_1:  0
total_step_2:  2984521
episode: 2444/2000000, thread: 1, score: -1.0, average: -10.12 
total_step_1:  2979254
total_step_2:  0
episode: 2445/2000000, thread: 0, score: -6.0, average: -10.08 
total_step_1:  0
total_step_2:  2988101
episode: 2446/2000000, thread: 1, score: -11.0, average: -10.32 
total_step_1:  2983141
total_step_2:  0
episode: 2447/2000000, thread: 0, score: -9.0, average: -10.22 
total_step_1:  0
total_step_2:  2991700
episode: 2448/2000000, thread: 1, score: -10.0, average: -10.10 
total_step_1:  2986927
total_step_2:  0
episode: 2449/2000000, thread: 0, score: -11.0, average: -10.10 
total_step_1:  0
total_step_2:  2995623
episode: 2450/2000000, thread: 1, score: -7.0, average: -10.04 SAVING
total_step_1:  2990336
total_step_2:  0
episode: 2451/2000000, thread: 0, score: -12.0, average: -10.02 SAVING
total_step_1:  0
total_step_2:  2999398
episode: 2452/2000000, thread: 1, score: -6.0, average: -9.86 SAVING
total_step_1:  0
total_step_2:  3002258
epi