In [1]:
import json
import random
import time
import socket
import os.path
from os import path
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [3]:
DISCOUNT_RATE = 0.99
LEARNING_RATE = 0.01
BATCH_SIZE = 500
REPLAY_MEMORY_CAPACITY = 20000
TARGET_UPDATE_STEP = 1000
MAX_STEPS_PER_EPISODE = 5000
TOTAL_EPISODES = 10000
POLICY_NETWORK_SAVE_FILE = "./DrMarioDQN_final/policy_network"

In [4]:
loss_function = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)

In [5]:
class Communicator:
    """Provides a socket communication between the DQN and Dr.Mario"""
    DR_MARIO_HOST = '192.168.0.107'
    PORT = 42012
    BUFFER_SIZE = 1024
    __connection = None
    
    def __init__(self):
        self.__connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 

    def connect(self):
        self.__connection.connect((self.DR_MARIO_HOST, self.PORT))

    def read_state(self):
        state_str = self.__connection.recv(self.BUFFER_SIZE).decode()
        return state_str

    def send_action(self, action):
        self.__connection.sendall(str.encode(action))

    def __del__(self):
        self.__connection.close()

In [6]:
class State:
    board = None
    __position = None
    condition = None
    NUM_ROWS = 8
    NUM_COLS = 8
    STATE_SIZE = NUM_ROWS * NUM_COLS + 2
    TILE_TYPE_ENCODING = {
        "E": 0,
        "RV": 1, "YV": 2, "BV": 3,
        "RPL": 4, "RPR": 5, "RPU": 6, "RPD": 7, "RPN": 8,
        "YPL": 9, "YPR": 10, "YPU": 11, "YPD": 12, "YPN": 13,
        "BPL": 14, "BPR": 15, "BPU": 16, "BPD": 17, "BPN": 18,
    }

    def __init__(self, board, position, condition):
        self.board = board
        self.__position = position
        self.condition = condition
    
    def encodeState(self):
        encoded_state = self._encodeBoard()
        encoded_state.append(self.__position['row'])
        encoded_state.append(self.__position['col'])
        return encoded_state

    def _encodeBoard(self):
        encoded_board = []
        for row in self.board:
            for tile in row:
                encoded_board.append(self.TILE_TYPE_ENCODING[tile])
        
        return encoded_board

In [7]:
class Experience:
    state = None
    action = None
    reward = None
    next_state = None
    
    def __init__(self, state, action, reward, next_state):
        self.state = state
        self.action = action
        self.reward = reward
        self.next_state = next_state

In [8]:
class RewardManager:
    @staticmethod
    def getReward(state, next_state):
        if RewardManager.hasPlayerWon(next_state):
            return 100
        if RewardManager.hasPlayerLost(next_state):
            return -50

        state_viruses = RewardManager.__countViruses(state)
        next_state_viruses = RewardManager.__countViruses(next_state)

        reward = -1
        reward += RewardManager.getNumberNonEmptyTiles(state)
        reward -= RewardManager.getNumberNonEmptyTiles(next_state)
        reward += (state_viruses - next_state_viruses) * 10
        
        return reward

    @staticmethod
    def hasPlayerWon(state):
        return state.condition == "W"

    @staticmethod 
    def hasPlayerLost(state):
        return state.condition == "L"

    @staticmethod
    def getNumberNonEmptyTiles(state):
        number_non_empty_tiles = 0

        for row in state.board:
            for tile in row:
                if tile != "E":
                   number_non_empty_tiles += 1

        return number_non_empty_tiles

    @staticmethod
    def __countViruses(state):
        count = 0
        for row in state.board:
            for tile in row:
                if len(tile) == 2:
                    count += 1
        return count

In [9]:
class Environment:
    RESET_ACTION = "RS"
    QUIT_ACTION = "QT"
    PLAYER_ACTIONS = ["ML", "MR", "RL", "RR", "WT"]
    __communicator = None
    __current_state = None

    def __init__(self):
        self.__communicator = Communicator()
        self.__communicator.connect()
        self.reset()

    def reset(self):
        self.__communicator.send_action(self.RESET_ACTION)
        self.__current_state = self.__getState()
        return self.getCurrentState().encodeState()
        
    def getCurrentState(self):
        return self.__current_state

    def quit(self):
        self.__communicator.send_action(self.QUIT_ACTION)
    
    def makeAction(self, action):
        state = self.__current_state

        self.__communicator.send_action(self.PLAYER_ACTIONS[action])
        next_state = self.__getState()

        reward = RewardManager.getReward(self.__current_state, next_state)

        self.__current_state = next_state
        return Experience(state.encodeState(), action, reward, next_state.encodeState())

    def hasGameEnded(self):
        return self.__current_state.condition != "O"

    def __getState(self):
        state_str = self.__communicator.read_state()
        state_json = json.loads(state_str)
        return State(state_json["board"], state_json["position"], state_json["condition"])

In [10]:
class ReplayMemory:
    memory = []
    __capacity = 0
    __position_to_add = 0

    def __init__(self, capacity):
        self.__capacity = capacity

    def add(self, experience):
        if not(self.__isFull()):
            self.memory.append(experience)
        else:
            self.memory[self.__position_to_add] = experience
            self.__position_to_add = (self.__position_to_add + 1) % self.__capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def canProvideBatch(self, batch_size):
        return len(self.memory) >= batch_size
    
    def __isFull(self):
        return len(self.memory) == self.__capacity


In [11]:
def create_model():
    inputs = layers.Input(shape=(State.STATE_SIZE,))

    layer1 = layers.Dense(256, activation="relu")(inputs)
    layer2 = layers.Dense(128, activation="relu")(layer1)
    action = layers.Dense(len(Environment.PLAYER_ACTIONS), activation="linear")(layer2)

    return keras.Model(inputs=inputs, outputs=action)

In [12]:
class Agent:
    EPSILON_MIN = 0.01
    EPSILON_MAX = 1.0
    EPSILON_DECAY_RATE = 0.0025

    epsilon = None
    
    actions_count = 0
    model = None
    target_model = None

    def __init__(self, actions_count):
        self.actions_count = actions_count
        self.model = create_model()
        if path.exists(POLICY_NETWORK_SAVE_FILE + ".index"):
            self.model.load_weights(POLICY_NETWORK_SAVE_FILE)
        self.target_model = create_model()
        self.updateTargetModel()
        self.epsilon = self.EPSILON_MAX

    def updateTargetModel(self):
        self.target_model.set_weights(self.model.get_weights())
    
    def __explore(self):
        return random.randrange(self.actions_count)

    def __exploit(self, state):
        state_tensor = tf.convert_to_tensor(state)
        state_tensor = tf.expand_dims(state_tensor, 0)
        action_probs = self.model(state_tensor, training=False)
        action = tf.argmax(action_probs[0]).numpy()
        return action

    def chooseAction(self, state):
        if random.random() > self.epsilon:
            return self.__exploit(state)
        else:
            return self.__explore()

    def decayEpsilon(self, episode):
        self.epsilon = self.EPSILON_MIN + (self.EPSILON_MAX - self.EPSILON_MIN) * np.exp(-episode * self.EPSILON_DECAY_RATE)
        

In [13]:
def disassembleBatch(experiences):
    states = []
    actions = []
    rewards = []
    next_states = []

    for experience in experiences:
        states.append(experience.state)
        actions.append(experience.action)
        rewards.append(experience.reward)
        next_states.append(experience.next_state)

    return (np.array(states), np.array(actions), np.array(rewards), np.array(next_states))


In [14]:
episodes_rewards = []
memory = ReplayMemory(REPLAY_MEMORY_CAPACITY)

In [15]:
env = Environment()
agent = Agent(len(env.PLAYER_ACTIONS))
learning_steps = 0

In [None]:
number_steps_made = 0

for episode in range(0, TOTAL_EPISODES):
    agent.model.save_weights(POLICY_NETWORK_SAVE_FILE)
    state = env.reset()
    episode_reward = 0
    number_steps_made_in_episode = 0

    print(f'episode number: {episode}')
    print(f'episodes rewards: {episodes_rewards}')
    
    for step in range(0, MAX_STEPS_PER_EPISODE):
        number_steps_made += 1
        number_steps_made_in_episode += 1
        action = agent.chooseAction(state)
        experience = env.makeAction(action)
        memory.add(experience)
        state = experience.next_state
        episode_reward += experience.reward

        if memory.canProvideBatch(BATCH_SIZE):
            learning_steps += 1
            batch = memory.sample(BATCH_SIZE)
            states, actions, rewards, next_states = disassembleBatch(batch)

            opt_q_next_state = agent.target_model.predict(next_states)
            expected_opt_q = rewards + DISCOUNT_RATE * tf.reduce_max(
                opt_q_next_state, axis=1
            )
            masks = tf.one_hot(actions, agent.actions_count)

            with tf.GradientTape() as tape:
                q_values = agent.model(states)
                q_for_action = tf.reduce_sum(tf.multiply(q_values, masks), axis=1)
                loss = loss_function(expected_opt_q, q_for_action)

            # Backpropagation
            grads = tape.gradient(loss, agent.model.trainable_variables)
            optimizer.apply_gradients(zip(grads, agent.model.trainable_variables))

        if (learning_steps != 0) and (learning_steps % TARGET_UPDATE_STEP == 0):
            agent.updateTargetModel()

        if env.hasGameEnded():
            break
    
    print(f'number steps made: {number_steps_made}')
    print(f'number steps made in episode: {number_steps_made_in_episode}')
    print(f'epsilon: {agent.epsilon}')
    print()
    agent.decayEpsilon(episode)
    episodes_rewards.append(episode_reward)

print(episodes_rewards)
env.quit()

episode number: 0
episodes rewards: []
number steps made: 225
number steps made in episode: 225
epsilon: 1.0

episode number: 1
episodes rewards: [-296]
number steps made: 510
number steps made in episode: 285
epsilon: 1.0

episode number: 2
episodes rewards: [-296, -352]
number steps made: 1314
number steps made in episode: 804
epsilon: 0.9975280911734855

episode number: 3
episodes rewards: [-296, -352, -879]
number steps made: 1721
number steps made in episode: 407
epsilon: 0.9950623544007555

episode number: 4
episodes rewards: [-296, -352, -879, -484]
number steps made: 1874
number steps made in episode: 153
epsilon: 0.992602774270947

episode number: 5
episodes rewards: [-296, -352, -879, -484, -214]
number steps made: 2001
number steps made in episode: 127
epsilon: 0.9901493354116764

episode number: 6
episodes rewards: [-296, -352, -879, -484, -214, -186]
number steps made: 2493
number steps made in episode: 492
epsilon: 0.9877020224889427

episode number: 7
episodes rewards: [

number steps made: 14066
number steps made in episode: 353
epsilon: 0.9170566829343688

episode number: 37
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409]
number steps made: 14338
number steps made in episode: 272
epsilon: 0.9147918734185159

episode number: 38
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339]
number steps made: 14650
number steps made in episode: 312
epsilon: 0.9125327188548171

episode number: 39
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505

number steps made: 21294
number steps made in episode: 252
epsilon: 0.8706646530448178

episode number: 58
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321]
number steps made: 21634
number steps made in episode: 340
epsilon: 0.8685156787493323

episode number: 59
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407]
number steps made: 21704
number steps made in 

number steps made: 25589
number steps made in episode: 332
epsilon: 0.8369175092971592

episode number: 74
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399]
number steps made: 25722
number steps made in episode: 133
epsilon: 0.8348527974890472

episode number: 75
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590

number steps made: 30498
number steps made in episode: 405
epsilon: 0.8084760257755536

episode number: 88
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468]
number steps made: 30802
number steps made in episode: 304
epsilon: 0.8064823288706297

episode number: 89
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409

number steps made: 33623
number steps made in episode: 285
epsilon: 0.7848774928594494

episode number: 100
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358]
number steps made: 33839
number steps made in episode: 216
epsilon: 0.7829427186028165

episode number: 101
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -4

number steps made: 37605
number steps made in episode: 273
epsilon: 0.7638586948834531

episode number: 111
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344]
number steps made: 37881
number steps made in episode: 276
epsilon: 0.7619764019927188

episode number: 112
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -4

number steps made: 41177
number steps made in episode: 235
epsilon: 0.7452458573885745

episode number: 121
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304]
number steps made: 41535
number steps made in episode: 358
epsilon: 0.7434100384749007

episode number: 122
episodes rewards: [-296, -352, -879, -4

number steps made: 44030
number steps made in episode: 315
epsilon: 0.7270925724888465

episode number: 131
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374]
number steps made: 44357
number steps made in episode: 327
epsilon: 0.7253020801056514


number steps made: 46589
number steps made in episode: 410
epsilon: 0.711138149933122

episode number: 140
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474]
number steps made: 47260
number st

number steps made: 48927
number steps made in episode: 257
epsilon: 0.695538693540557

episode number: 149
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -188

number steps made: 51513
number steps made in episode: 283
epsilon: 0.6819641179089002

episode number: 157
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 53450
number steps made in episode: 137
epsilon: 0.6686583368840481

episode number: 165
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 56084
number steps made in episode: 364
epsilon: 0.6556160279761778

episode number: 173
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 57872
number steps made in episode: 105
epsilon: 0.6444160332719956

episode number: 180
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 59767
number steps made in episode: 190
epsilon: 0.6334103334364939

episode number: 187
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 61843
number steps made in episode: 436
epsilon: 0.6225955578880794

episode number: 194
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 63512
number steps made in episode: 398
epsilon: 0.6119683945172141

episode number: 201
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 66321
number steps made in episode: 244
epsilon: 0.6015255886720551

episode number: 208
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 67684
number steps made in episode: 226
epsilon: 0.5927189199815717

episode number: 214
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 69601
number steps made in episode: 381
epsilon: 0.5840433655064479

episode number: 220
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 70768
number steps made in episode: 131
epsilon: 0.5754969732103268

episode number: 226
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 71879
number steps made in episode: 310
epsilon: 0.5670778201188862

episode number: 232
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 73474
number steps made in episode: 302
epsilon: 0.558784011887162

episode number: 238
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -188

number steps made: 75018
number steps made in episode: 129
epsilon: 0.5506136823733123

episode number: 244
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 76711
number steps made in episode: 144
epsilon: 0.5425649932187278

episode number: 250
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 78555
number steps made in episode: 458
epsilon: 0.5346361334343929

episode number: 256
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 79855
number steps made in episode: 371
epsilon: 0.5281189987167522

episode number: 261
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 81012
number steps made in episode: 234
epsilon: 0.5216828211467822

episode number: 266
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 82398
number steps made in episode: 114
epsilon: 0.5153265950586433

episode number: 271
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 83131
number steps made in episode: 67
epsilon: 0.5090493272790773

episode number: 276
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -188

number steps made: 84217
number steps made in episode: 399
epsilon: 0.5028500369722223

episode number: 281
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 86100
number steps made in episode: 281
epsilon: 0.49672775548635545

episode number: 286
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 87212
number steps made in episode: 240
epsilon: 0.49068152620253863

episode number: 291
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 88651
number steps made in episode: 124
epsilon: 0.4847104043851452

episode number: 296
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 90542
number steps made in episode: 423
epsilon: 0.4788134570342426

episode number: 301
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 92396
number steps made in episode: 291
epsilon: 0.47298976273981014

episode number: 306
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 94259
number steps made in episode: 617
epsilon: 0.46723841153776574

episode number: 311
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 95296
number steps made in episode: 144
epsilon: 0.4615585047677828

episode number: 316
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 96543
number steps made in episode: 415
epsilon: 0.45594915493287286

episode number: 321
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 97293
number steps made in episode: 42
epsilon: 0.45151188670187276

episode number: 325
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 98447
number steps made in episode: 271
epsilon: 0.44711877002747064

episode number: 329
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 99442
number steps made in episode: 88
epsilon: 0.4427693655943381

episode number: 333
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -188

number steps made: 100468
number steps made in episode: 241
epsilon: 0.43846323845840735

episode number: 337
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 101608
number steps made in episode: 285
epsilon: 0.4341999580033764

episode number: 341
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 102843
number steps made in episode: 446
epsilon: 0.4299790978976469

episode number: 345
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 104124
number steps made in episode: 568
epsilon: 0.4258002360516908

episode number: 349
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 105232
number steps made in episode: 268
epsilon: 0.42166295457584135

episode number: 353
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 105961
number steps made in episode: 349
epsilon: 0.41756683973850306

episode number: 357
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 107042
number steps made in episode: 398
epsilon: 0.41351148192477877

episode number: 361
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 108139
number steps made in episode: 372
epsilon: 0.40949647559550767

episode number: 365
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 108981
number steps made in episode: 146
epsilon: 0.405521419246711

episode number: 369
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 109965
number steps made in episode: 60
epsilon: 0.40158591536944116

episode number: 373
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 111110
number steps made in episode: 439
epsilon: 0.397689570410031

episode number: 377
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 112042
number steps made in episode: 289
epsilon: 0.3938319947307376

episode number: 381
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 113336
number steps made in episode: 224
epsilon: 0.3900128025707783

episode number: 385
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 114108
number steps made in episode: 407
epsilon: 0.38623161200775447

episode number: 389
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 115684
number steps made in episode: 541
epsilon: 0.3824880449194588

episode number: 393
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 117052
number steps made in episode: 238
epsilon: 0.3787817269460628

episode number: 397
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 117964
number steps made in episode: 263
epsilon: 0.3751122874526806

episode number: 401
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 119033
number steps made in episode: 477
epsilon: 0.3714793594923049

episode number: 405
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 119902
number steps made in episode: 317
epsilon: 0.36788257976911215

episode number: 409
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 121350
number steps made in episode: 311
epsilon: 0.3643215886021329

episode number: 413
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 122607
number steps made in episode: 281
epsilon: 0.3607960298892828

episode number: 417
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 123362
number steps made in episode: 269
epsilon: 0.3573055510717526

episode number: 421
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 124186
number steps made in episode: 148
epsilon: 0.35384980309875186

episode number: 425
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 124990
number steps made in episode: 144
epsilon: 0.35042844039260346

episode number: 429
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 125983
number steps made in episode: 235
epsilon: 0.34704112081418564

episode number: 433
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 126818
number steps made in episode: 461
epsilon: 0.34368750562871775

episode number: 437
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 127438
number steps made in episode: 177
epsilon: 0.34119421087912155

episode number: 440
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 128191
number steps made in episode: 151
epsilon: 0.33871954589121406

episode number: 443
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 129075
number steps made in episode: 160
epsilon: 0.3362633714644372

episode number: 446
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 130084
number steps made in episode: 203
epsilon: 0.33382554943833187

episode number: 449
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 130893
number steps made in episode: 299
epsilon: 0.33140594268476625

episode number: 452
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 131917
number steps made in episode: 80
epsilon: 0.3290044151002225

episode number: 455
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 132504
number steps made in episode: 199
epsilon: 0.3266208315981408

episode number: 458
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 133749
number steps made in episode: 253
epsilon: 0.3242550581013207

episode number: 461
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 134533
number steps made in episode: 150
epsilon: 0.3219069615343792

episode number: 464
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 135582
number steps made in episode: 307
epsilon: 0.3195764098162652

episode number: 467
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 136266
number steps made in episode: 165
epsilon: 0.3172632718528302

episode number: 470
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 136761
number steps made in episode: 89
epsilon: 0.3149674175294537

episode number: 473
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 137353
number steps made in episode: 146
epsilon: 0.31268871770372464

episode number: 476
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 137970
number steps made in episode: 288
epsilon: 0.31042704419817707

episode number: 479
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 138717
number steps made in episode: 94
epsilon: 0.3081822697930801

episode number: 482
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 139445
number steps made in episode: 317
epsilon: 0.3059542682192813

episode number: 485
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 140102
number steps made in episode: 141
epsilon: 0.3037429141511048

episode number: 488
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 140912
number steps made in episode: 662
epsilon: 0.3015480831993013

episode number: 491
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 141813
number steps made in episode: 33
epsilon: 0.29936965190405085

episode number: 494
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 142750
number steps made in episode: 206
epsilon: 0.29720749772801874

episode number: 497
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 143534
number steps made in episode: 204
epsilon: 0.29506149904946255

episode number: 500
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 143985
number steps made in episode: 27
epsilon: 0.2929315351553908

episode number: 503
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 144448
number steps made in episode: 128
epsilon: 0.2908174862347727

episode number: 506
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 145044
number steps made in episode: 65
epsilon: 0.2887192333717991

episode number: 509
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -18

number steps made: 145568
number steps made in episode: 294
epsilon: 0.28663665853919323

episode number: 512
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 146045
number steps made in episode: 160
epsilon: 0.2845696445915717

episode number: 515
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 146603
number steps made in episode: 197
epsilon: 0.2825180752588548

episode number: 518
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 146951
number steps made in episode: 167
epsilon: 0.2804818351397267

episode number: 521
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 147362
number steps made in episode: 149
epsilon: 0.2784608096951439

episode number: 524
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 148249
number steps made in episode: 222
epsilon: 0.27645488524189205

episode number: 527
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 148986
number steps made in episode: 315
epsilon: 0.27446394894619186

episode number: 530
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 149930
number steps made in episode: 583
epsilon: 0.27248788881735175

episode number: 533
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 151034
number steps made in episode: 208
epsilon: 0.2705265937014684

episode number: 536
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 151441
number steps made in episode: 177
epsilon: 0.2685799532751744

episode number: 539
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 152077
number steps made in episode: 287
epsilon: 0.2666478580394326

episode number: 542
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 153279
number steps made in episode: 496
epsilon: 0.26473019931337644

episode number: 545
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 153953
number steps made in episode: 152
epsilon: 0.2628268692281969

episode number: 548
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1

number steps made: 155186
number steps made in episode: 181
epsilon: 0.26093776072107494

episode number: 551
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -

number steps made: 155786
number steps made in episode: 259
epsilon: 0.2590627675291589

episode number: 554
episodes rewards: [-296, -352, -879, -484, -214, -186, -576, -459, -625, -788, -309, -530, -739, -426, -266, -516, -489, -185, -243, -374, -643, -629, -226, -436, -431, -438, -389, -489, -505, -372, -321, -256, -689, -581, -378, -520, -409, -339, -381, -388, -330, -424, -408, -476, -285, -631, -533, -268, -198, -318, -590, -452, -259, -457, -656, -551, -383, -321, -407, -127, -431, -406, -401, -267, -218, -231, -373, -273, -211, -631, -204, -198, -594, -399, -194, -586, -344, -513, -580, -509, -254, -387, -455, -419, -381, -337, -471, -468, -371, -417, -271, -348, -427, -320, -352, -328, -243, -281, -227, -358, -289, -501, -356, -413, -376, -425, -560, -298, -542, -632, -344, -350, -293, -503, -587, -397, -503, -316, -332, -683, -304, -430, -261, -422, -351, -562, -225, -337, -203, -357, -374, -398, -518, -297, -348, -245, -495, -243, -137, -474, -716, -178, -180, -368, -131, -1