In [1]:
import tensorflow as tf

In [2]:
# coding:utf-8

import os
import gym
import random
import numpy as np
import tensorflow as tf
from collections import deque
from skimage.color import rgb2gray
from skimage.transform import resize




In [3]:
ENV_NAME = 'Breakout-v0'  # Environment name
FRAME_WIDTH = 84  # Resized frame width
FRAME_HEIGHT = 84  # Resized frame height
NUM_EPISODES = 12000  # Number of episodes the agent plays
STATE_LENGTH = 4  # Number of most recent frames to produce the input to the network
GAMMA = 0.99  # Discount factor
EXPLORATION_STEPS = 1000000  # Number of steps over which the initial value of epsilon is linearly annealed to its final value
INITIAL_EPSILON = 1.0  # Initial value of epsilon in epsilon-greedy
FINAL_EPSILON = 0.1  # Final value of epsilon in epsilon-greedy
INITIAL_REPLAY_SIZE = 20000  # Number of steps to populate the replay memory before training starts
NUM_REPLAY_MEMORY = 400000  # Number of replay memory the agent uses for training
BATCH_SIZE = 32  # Mini batch size
TARGET_UPDATE_INTERVAL = 10000  # The frequency with which the target network is updated
TRAIN_INTERVAL = 4  # The agent selects 4 actions between successive updates
LEARNING_RATE = 0.00025  # Learning rate used by RMSProp
MOMENTUM = 0.95  # Momentum used by RMSProp
MIN_GRAD = 0.01  # Constant added to the squared gradient in the denominator of the RMSProp update
SAVE_INTERVAL = 300000  # The frequency with which the network is saved
NO_OP_STEPS = 30  # Maximum number of "do nothing" actions to be performed by the agent at the start of an episode
LOAD_NETWORK = False
TRAIN = True
SAVE_NETWORK_PATH = 'saved_networks/' + ENV_NAME
SAVE_SUMMARY_PATH = 'graphs/dqn/kerastestnew'
NUM_EPISODES_AT_TEST = 30  # Number of episodes the agent plays at test time

In [4]:

def preprocess(observation, last_observation):
    processed_observation = np.maximum(observation, last_observation)
    processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT)) * 255)
    return processed_observation


In [5]:

env = gym.make(ENV_NAME)
num_actions = env.action_space.n
epsilon = INITIAL_EPSILON
epsilon_step = (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORATION_STEPS
t = 0

# Parameters used for summary
total_reward = 0
total_q_max = 0
total_loss = 0
duration = 0
episode = 0

# Create replay memory
replay_memory = deque()


In [6]:
def build_network():
    model = tf.contrib.keras.models.Sequential()
    #Conv2D(32, (8, 8), input_shape=(4, 84, 84..., strides=(4, 4), activation="relu")`
    model.add(tf.contrib.keras.layers.Convolution2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(FRAME_WIDTH, FRAME_HEIGHT, STATE_LENGTH)))
    model.add(tf.contrib.keras.layers.Convolution2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(tf.contrib.keras.layers.Convolution2D(64, (3, 3), strides=(1, 1), activation='relu'))
    model.add(tf.contrib.keras.layers.Flatten())
    model.add(tf.contrib.keras.layers.Dense(512, activation='relu'))
    model.add(tf.contrib.keras.layers.Dense(num_actions))

    s = tf.placeholder(tf.float32, [None, FRAME_WIDTH, FRAME_HEIGHT, STATE_LENGTH])
    q_values = model(s)
    return s, q_values, model

# Create q network
s, q_values, q_network = build_network()
q_network_weights = q_network.trainable_weights

# Create target network
st, target_q_values, target_network = build_network()
target_network_weights = target_network.trainable_weights

# Define target network update operation
update_target_network = [target_network_weights[i].assign(q_network_weights[i]) for i in range(len(target_network_weights))]


a = tf.placeholder(tf.int64, [None])
y = tf.placeholder(tf.float32, [None])

# Convert action to one hot vector
a_one_hot = tf.one_hot(a, num_actions, 1.0, 0.0)
q_value = tf.reduce_sum(tf.multiply(q_values, a_one_hot), reduction_indices=1)

# Clip the error, the loss is quadratic when the error is in (-1, 1), and linear outside of that region
error = tf.abs(y - q_value)
quadratic_part = tf.clip_by_value(error, 0.0, 1.0)
linear_part = error - quadratic_part
loss = tf.reduce_mean(0.5 * tf.square(quadratic_part) + linear_part)

optimizer = tf.train.RMSPropOptimizer(LEARNING_RATE, momentum=MOMENTUM, epsilon=MIN_GRAD)
grads_update = optimizer.minimize(loss, var_list=q_network_weights)

def get_initial_state(observation, last_observation):
    processed_observation = np.maximum(observation, last_observation)
    processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT)) * 255)
    state = [processed_observation for _ in range(STATE_LENGTH)]
    return np.dstack(state)

def get_action(state, epsilon):
    if epsilon >= random.random() or t < INITIAL_REPLAY_SIZE:
        action = random.randrange(num_actions)
    else:
        action = np.argmax(q_values.eval(feed_dict={s: [np.float32(state / 255.0)]}))

    # Anneal epsilon linearly over time
    if epsilon > FINAL_EPSILON and t >= INITIAL_REPLAY_SIZE:
        epsilon -= epsilon_step

    return action

In [7]:


sess = tf.InteractiveSession()
saver = tf.train.Saver(q_network_weights)

### SUMMARY STUFF
episode_total_reward = tf.Variable(0.)
tf.summary.scalar(ENV_NAME + '/Total Reward/Episode', episode_total_reward)
episode_avg_max_q = tf.Variable(0.)
tf.summary.scalar(ENV_NAME + '/Average Max Q/Episode', episode_avg_max_q)
episode_duration = tf.Variable(0.)
tf.summary.scalar(ENV_NAME + '/Duration/Episode', episode_duration)
episode_avg_loss = tf.Variable(0.)
tf.summary.scalar(ENV_NAME + '/Average Loss/Episode', episode_avg_loss)
summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]
summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]
update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]
summary_op = tf.summary.merge_all()

summary_writer = tf.summary.FileWriter(SAVE_SUMMARY_PATH, sess.graph)

if not os.path.exists(SAVE_NETWORK_PATH):
    os.makedirs(SAVE_NETWORK_PATH)

sess.run(tf.global_variables_initializer())

# Initialize target network
sess.run(update_target_network)

    


INFO:tensorflow:Summary name Breakout-v0/Total Reward/Episode is illegal; using Breakout-v0/Total_Reward/Episode instead.
INFO:tensorflow:Summary name Breakout-v0/Average Max Q/Episode is illegal; using Breakout-v0/Average_Max_Q/Episode instead.
INFO:tensorflow:Summary name Breakout-v0/Average Loss/Episode is illegal; using Breakout-v0/Average_Loss/Episode instead.


[array([[[[  3.88692319e-03,  -1.21070072e-03,   2.92069465e-02, ...,
            -2.75976639e-02,   1.43117681e-02,  -4.52957191e-02],
          [  6.68125972e-03,  -1.21617578e-02,  -2.12611854e-02, ...,
            -3.42346355e-02,   1.35318562e-02,  -3.72621380e-02],
          [ -4.09044698e-02,   3.88623551e-02,   3.11767310e-02, ...,
            -1.88507438e-02,   3.28348055e-02,  -4.85737808e-02],
          [  3.55755463e-02,  -8.47697631e-03,   1.27194822e-03, ...,
            -4.70306240e-02,   4.81653586e-03,   2.47320533e-02]],
 
         [[  3.59374583e-02,  -3.60784233e-02,   3.97876501e-02, ...,
            -4.66586389e-02,  -1.98579822e-02,  -8.08069110e-03],
          [ -3.61337326e-02,   4.66899797e-02,   1.56938881e-02, ...,
             7.80966505e-03,  -2.79458035e-02,   1.18259713e-03],
          [ -3.48909497e-02,  -3.93933505e-02,  -7.97758996e-03, ...,
            -1.79040618e-02,   3.72865573e-02,   2.15436742e-02],
          [  1.94916129e-03,  -1.71689838e-02

In [8]:

    


def run(self, state, action, reward, terminal, observation):
    next_state = np.dstack((state[:, :, 1:], observation))

    # Clip all positive rewards at 1 and all negative rewards at -1, leaving 0 rewards unchanged
    reward = np.clip(reward, -1, 1)

    # Store transition in replay memory
    self.replay_memory.append((state, action, reward, next_state, terminal))
    if len(self.replay_memory) > NUM_REPLAY_MEMORY:
        self.replay_memory.popleft()

    if self.t >= INITIAL_REPLAY_SIZE:
        # Train network
        if self.t % TRAIN_INTERVAL == 0:
            self.train_network()

        # Update target network
        if self.t % TARGET_UPDATE_INTERVAL == 0:
            self.sess.run(self.update_target_network)

        # Save network
        if self.t % SAVE_INTERVAL == 0:
            save_path = self.saver.save(self.sess, SAVE_NETWORK_PATH + '/' + ENV_NAME, global_step=self.t)
            print('Successfully saved: ' + save_path)

    self.total_reward += reward
    self.total_q_max += np.max(self.q_values.eval(feed_dict={self.s: [np.float32(state / 255.0)]}))
    self.duration += 1

    if terminal:
        # Write summary
        if self.t >= INITIAL_REPLAY_SIZE:
            stats = [self.total_reward, self.total_q_max / float(self.duration),
                    self.duration, self.total_loss / (float(self.duration) / float(TRAIN_INTERVAL))]
            for i in range(len(stats)):
                self.sess.run(self.update_ops[i], feed_dict={
                    self.summary_placeholders[i]: float(stats[i])
                })
            summary_str = self.sess.run(self.summary_op)
            self.summary_writer.add_summary(summary_str, self.episode + 1)

        # Debug
        if self.t < INITIAL_REPLAY_SIZE:
            mode = 'random'
        elif INITIAL_REPLAY_SIZE <= self.t < INITIAL_REPLAY_SIZE + EXPLORATION_STEPS:
            mode = 'explore'
        else:
            mode = 'exploit'
        print('EPISODE: {0:6d} / TIMESTEP: {1:8d} / DURATION: {2:5d} / EPSILON: {3:.5f} / TOTAL_REWARD: {4:3.0f} / AVG_MAX_Q: {5:2.4f} / AVG_LOSS: {6:.5f} / MODE: {7}'.format(
            self.episode + 1, self.t, self.duration, self.epsilon,
            self.total_reward, self.total_q_max / float(self.duration),
            self.total_loss / (float(self.duration) / float(TRAIN_INTERVAL)), mode))

        self.total_reward = 0
        self.total_q_max = 0
        self.total_loss = 0
        self.duration = 0
        self.episode += 1

    self.t += 1

    return next_state

def train_network(replay_memory):
    state_batch = []
    action_batch = []
    reward_batch = []
    next_state_batch = []
    terminal_batch = []
    y_batch = []

    # Sample random minibatch of transition from replay memory
    minibatch = random.sample(replay_memory, BATCH_SIZE)
    for data in minibatch:
        state_batch.append(data[0])
        action_batch.append(data[1])
        reward_batch.append(data[2])
        next_state_batch.append(data[3])
        terminal_batch.append(data[4])

    # Convert True to 1, False to 0
    terminal_batch = np.array(terminal_batch) + 0

    target_q_values_batch = target_q_values.eval(feed_dict={st: np.float32(np.array(next_state_batch) / 255.0)})
    y_batch = reward_batch + (1 - terminal_batch) * GAMMA * np.max(target_q_values_batch, axis=1)

    local_loss, _ = sess.run([loss, grads_update], feed_dict={
        s: np.float32(np.array(state_batch) / 255.0),
        a: action_batch,
        y: y_batch
    })
    global total_loss
    
    total_loss += local_loss

In [9]:



for _ in range(NUM_EPISODES):
    terminal = False
    observation = env.reset()
    for _ in range(random.randint(1, NO_OP_STEPS)):
        last_observation = observation
        observation, _, _, _ = env.step(0)  # Do nothing
    state = get_initial_state(observation, last_observation)
    while not terminal:
        last_observation = observation
        action = get_action(state, epsilon)
        observation, reward, terminal, _ = env.step(action)
        # env.render()
        processed_observation = preprocess(observation, last_observation)

#         state = agent.run(state, action, reward, terminal, processed_observation)
        next_state = np.dstack((state[:, :, 1:], processed_observation))

        # Clip all positive rewards at 1 and all negative rewards at -1, leaving 0 rewards unchanged
        reward = np.clip(reward, -1, 1)

        # Store transition in replay memory
        replay_memory.append((state, action, reward, next_state, terminal))
        if len(replay_memory) > NUM_REPLAY_MEMORY:
            replay_memory.popleft()

        if t >= INITIAL_REPLAY_SIZE:
            # Train network
            if t % TRAIN_INTERVAL == 0:
                train_network(replay_memory)

            # Update target network
            if t % TARGET_UPDATE_INTERVAL == 0:
                sess.run(update_target_network)

        total_reward += reward
        total_q_max += np.max(q_values.eval(feed_dict={s: [np.float32(state / 255.0)]}))
        duration += 1

        if terminal:
            # Write summary
            if t >= INITIAL_REPLAY_SIZE:
                stats = [total_reward, total_q_max / float(duration),
                        duration, total_loss / (float(duration) / float(TRAIN_INTERVAL))]
                for i in range(len(stats)):
                    sess.run(update_ops[i], feed_dict={
                        summary_placeholders[i]: float(stats[i])
                    })
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, episode + 1)

            # Debug
            if t < INITIAL_REPLAY_SIZE:
                mode = 'random'
            elif INITIAL_REPLAY_SIZE <= t < INITIAL_REPLAY_SIZE + EXPLORATION_STEPS:
                mode = 'explore'
            else:
                mode = 'exploit'
            print('EPISODE: {0:6d} / TIMESTEP: {1:8d} / DURATION: {2:5d} / EPSILON: {3:.5f} / TOTAL_REWARD: {4:3.0f} / AVG_MAX_Q: {5:2.4f} / AVG_LOSS: {6:.5f} / MODE: {7}'.format(
                episode + 1, t, duration, epsilon,
                total_reward, total_q_max / float(duration),
                total_loss / (float(duration) / float(TRAIN_INTERVAL)), mode))

            total_reward = 0
            total_q_max = 0
            total_loss = 0
            duration = 0
            episode += 1

        t += 1

        state = next_state

  warn("The default mode, 'constant', will be changed to 'reflect' in "


EPISODE:      1 / TIMESTEP:      214 / DURATION:   215 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0619 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:      2 / TIMESTEP:      388 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0624 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:      3 / TIMESTEP:      635 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0600 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:      4 / TIMESTEP:      807 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0623 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:      5 / TIMESTEP:     1024 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0617 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:      6 / TIMESTEP:     1271 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0602 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:      7 / TIMESTEP:     1562 / DURATION:   291 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q:

EPISODE:     56 / TIMESTEP:    13356 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0603 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:     57 / TIMESTEP:    13593 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0601 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:     58 / TIMESTEP:    13921 / DURATION:   328 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.0615 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:     59 / TIMESTEP:    14130 / DURATION:   209 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0603 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:     60 / TIMESTEP:    14402 / DURATION:   272 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.0622 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:     61 / TIMESTEP:    14610 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0626 / AVG_LOSS: 0.00000 / MODE: random
EPISODE:     62 / TIMESTEP:    14784 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q:

EPISODE:    111 / TIMESTEP:    27114 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0705 / AVG_LOSS: 0.00138 / MODE: explore
EPISODE:    112 / TIMESTEP:    27291 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0701 / AVG_LOSS: 0.00248 / MODE: explore
EPISODE:    113 / TIMESTEP:    27536 / DURATION:   245 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0703 / AVG_LOSS: 0.00357 / MODE: explore
EPISODE:    114 / TIMESTEP:    27706 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0773 / AVG_LOSS: 0.00401 / MODE: explore
EPISODE:    115 / TIMESTEP:    27873 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0712 / AVG_LOSS: 0.00337 / MODE: explore
EPISODE:    116 / TIMESTEP:    28102 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0695 / AVG_LOSS: 0.00246 / MODE: explore
EPISODE:    117 / TIMESTEP:    28293 / DURATION:   191 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:    166 / TIMESTEP:    40329 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0880 / AVG_LOSS: 0.00149 / MODE: explore
EPISODE:    167 / TIMESTEP:    40513 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0878 / AVG_LOSS: 0.00308 / MODE: explore
EPISODE:    168 / TIMESTEP:    40775 / DURATION:   262 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.0885 / AVG_LOSS: 0.00309 / MODE: explore
EPISODE:    169 / TIMESTEP:    40951 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0841 / AVG_LOSS: 0.00178 / MODE: explore
EPISODE:    170 / TIMESTEP:    41184 / DURATION:   233 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0813 / AVG_LOSS: 0.00216 / MODE: explore
EPISODE:    171 / TIMESTEP:    41371 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0903 / AVG_LOSS: 0.00268 / MODE: explore
EPISODE:    172 / TIMESTEP:    41550 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:    221 / TIMESTEP:    54109 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0886 / AVG_LOSS: 0.00224 / MODE: explore
EPISODE:    222 / TIMESTEP:    54360 / DURATION:   251 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.0924 / AVG_LOSS: 0.00151 / MODE: explore
EPISODE:    223 / TIMESTEP:    54577 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0885 / AVG_LOSS: 0.00232 / MODE: explore
EPISODE:    224 / TIMESTEP:    54772 / DURATION:   195 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.0945 / AVG_LOSS: 0.00226 / MODE: explore
EPISODE:    225 / TIMESTEP:    54989 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0918 / AVG_LOSS: 0.00289 / MODE: explore
EPISODE:    226 / TIMESTEP:    55226 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0913 / AVG_LOSS: 0.00187 / MODE: explore
EPISODE:    227 / TIMESTEP:    55409 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:    276 / TIMESTEP:    66962 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1102 / AVG_LOSS: 0.00249 / MODE: explore
EPISODE:    277 / TIMESTEP:    67232 / DURATION:   270 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1044 / AVG_LOSS: 0.00232 / MODE: explore
EPISODE:    278 / TIMESTEP:    67452 / DURATION:   220 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0993 / AVG_LOSS: 0.00230 / MODE: explore
EPISODE:    279 / TIMESTEP:    67684 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.0989 / AVG_LOSS: 0.00192 / MODE: explore
EPISODE:    280 / TIMESTEP:    67958 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1055 / AVG_LOSS: 0.00344 / MODE: explore
EPISODE:    281 / TIMESTEP:    68258 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1019 / AVG_LOSS: 0.00294 / MODE: explore
EPISODE:    282 / TIMESTEP:    68445 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:    331 / TIMESTEP:    81286 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1174 / AVG_LOSS: 0.00231 / MODE: explore
EPISODE:    332 / TIMESTEP:    81630 / DURATION:   344 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.1206 / AVG_LOSS: 0.00348 / MODE: explore
EPISODE:    333 / TIMESTEP:    81818 / DURATION:   188 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1227 / AVG_LOSS: 0.00105 / MODE: explore
EPISODE:    334 / TIMESTEP:    82051 / DURATION:   233 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1157 / AVG_LOSS: 0.00323 / MODE: explore
EPISODE:    335 / TIMESTEP:    82267 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1152 / AVG_LOSS: 0.00204 / MODE: explore
EPISODE:    336 / TIMESTEP:    82548 / DURATION:   281 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1209 / AVG_LOSS: 0.00269 / MODE: explore
EPISODE:    337 / TIMESTEP:    82817 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:    386 / TIMESTEP:    94143 / DURATION:   202 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1270 / AVG_LOSS: 0.00094 / MODE: explore
EPISODE:    387 / TIMESTEP:    94428 / DURATION:   285 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1293 / AVG_LOSS: 0.00178 / MODE: explore
EPISODE:    388 / TIMESTEP:    94603 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1228 / AVG_LOSS: 0.00077 / MODE: explore
EPISODE:    389 / TIMESTEP:    94827 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1287 / AVG_LOSS: 0.00282 / MODE: explore
EPISODE:    390 / TIMESTEP:    95059 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1347 / AVG_LOSS: 0.00247 / MODE: explore
EPISODE:    391 / TIMESTEP:    95304 / DURATION:   245 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1293 / AVG_LOSS: 0.00236 / MODE: explore
EPISODE:    392 / TIMESTEP:    95544 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:    441 / TIMESTEP:   107346 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1368 / AVG_LOSS: 0.00293 / MODE: explore
EPISODE:    442 / TIMESTEP:   107649 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1418 / AVG_LOSS: 0.00334 / MODE: explore
EPISODE:    443 / TIMESTEP:   107894 / DURATION:   245 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1355 / AVG_LOSS: 0.00257 / MODE: explore
EPISODE:    444 / TIMESTEP:   108097 / DURATION:   203 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1383 / AVG_LOSS: 0.00373 / MODE: explore
EPISODE:    445 / TIMESTEP:   108422 / DURATION:   325 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.1373 / AVG_LOSS: 0.00291 / MODE: explore
EPISODE:    446 / TIMESTEP:   108598 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1357 / AVG_LOSS: 0.00216 / MODE: explore
EPISODE:    447 / TIMESTEP:   108767 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:    496 / TIMESTEP:   121757 / DURATION:   215 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1557 / AVG_LOSS: 0.00179 / MODE: explore
EPISODE:    497 / TIMESTEP:   122161 / DURATION:   404 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.1581 / AVG_LOSS: 0.00192 / MODE: explore
EPISODE:    498 / TIMESTEP:   122384 / DURATION:   223 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1586 / AVG_LOSS: 0.00173 / MODE: explore
EPISODE:    499 / TIMESTEP:   122764 / DURATION:   380 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.1619 / AVG_LOSS: 0.00398 / MODE: explore
EPISODE:    500 / TIMESTEP:   122946 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1571 / AVG_LOSS: 0.00176 / MODE: explore
EPISODE:    501 / TIMESTEP:   123424 / DURATION:   478 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 0.1597 / AVG_LOSS: 0.00189 / MODE: explore
EPISODE:    502 / TIMESTEP:   123620 / DURATION:   196 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:    551 / TIMESTEP:   134967 / DURATION:   249 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1697 / AVG_LOSS: 0.00257 / MODE: explore
EPISODE:    552 / TIMESTEP:   135274 / DURATION:   307 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1672 / AVG_LOSS: 0.00088 / MODE: explore
EPISODE:    553 / TIMESTEP:   135513 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1711 / AVG_LOSS: 0.00343 / MODE: explore
EPISODE:    554 / TIMESTEP:   135687 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1684 / AVG_LOSS: 0.00183 / MODE: explore
EPISODE:    555 / TIMESTEP:   135873 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1745 / AVG_LOSS: 0.00508 / MODE: explore
EPISODE:    556 / TIMESTEP:   136062 / DURATION:   189 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1706 / AVG_LOSS: 0.00269 / MODE: explore
EPISODE:    557 / TIMESTEP:   136308 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:    606 / TIMESTEP:   148352 / DURATION:   313 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.1774 / AVG_LOSS: 0.00269 / MODE: explore
EPISODE:    607 / TIMESTEP:   148729 / DURATION:   377 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.1720 / AVG_LOSS: 0.00090 / MODE: explore
EPISODE:    608 / TIMESTEP:   148881 / DURATION:   152 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1777 / AVG_LOSS: 0.00296 / MODE: explore
EPISODE:    609 / TIMESTEP:   149224 / DURATION:   343 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.1772 / AVG_LOSS: 0.00223 / MODE: explore
EPISODE:    610 / TIMESTEP:   149406 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.1700 / AVG_LOSS: 0.00146 / MODE: explore
EPISODE:    611 / TIMESTEP:   149681 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1794 / AVG_LOSS: 0.00234 / MODE: explore
EPISODE:    612 / TIMESTEP:   149975 / DURATION:   294 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:    661 / TIMESTEP:   162075 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2003 / AVG_LOSS: 0.00308 / MODE: explore
EPISODE:    662 / TIMESTEP:   162285 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1912 / AVG_LOSS: 0.00039 / MODE: explore
EPISODE:    663 / TIMESTEP:   162570 / DURATION:   285 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.1961 / AVG_LOSS: 0.00267 / MODE: explore
EPISODE:    664 / TIMESTEP:   162774 / DURATION:   204 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2031 / AVG_LOSS: 0.00431 / MODE: explore
EPISODE:    665 / TIMESTEP:   163019 / DURATION:   245 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1955 / AVG_LOSS: 0.00111 / MODE: explore
EPISODE:    666 / TIMESTEP:   163267 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.1992 / AVG_LOSS: 0.00281 / MODE: explore
EPISODE:    667 / TIMESTEP:   163487 / DURATION:   220 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:    716 / TIMESTEP:   175546 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2086 / AVG_LOSS: 0.00224 / MODE: explore
EPISODE:    717 / TIMESTEP:   175824 / DURATION:   278 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2091 / AVG_LOSS: 0.00230 / MODE: explore
EPISODE:    718 / TIMESTEP:   176033 / DURATION:   209 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2055 / AVG_LOSS: 0.00130 / MODE: explore
EPISODE:    719 / TIMESTEP:   176331 / DURATION:   298 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.2089 / AVG_LOSS: 0.00243 / MODE: explore
EPISODE:    720 / TIMESTEP:   176643 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2117 / AVG_LOSS: 0.00284 / MODE: explore
EPISODE:    721 / TIMESTEP:   176812 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2105 / AVG_LOSS: 0.00375 / MODE: explore
EPISODE:    722 / TIMESTEP:   176988 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:    771 / TIMESTEP:   189167 / DURATION:   270 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2210 / AVG_LOSS: 0.00172 / MODE: explore
EPISODE:    772 / TIMESTEP:   189448 / DURATION:   281 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2203 / AVG_LOSS: 0.00318 / MODE: explore
EPISODE:    773 / TIMESTEP:   189747 / DURATION:   299 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2217 / AVG_LOSS: 0.00342 / MODE: explore
EPISODE:    774 / TIMESTEP:   189925 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2245 / AVG_LOSS: 0.00249 / MODE: explore
EPISODE:    775 / TIMESTEP:   190363 / DURATION:   438 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.2227 / AVG_LOSS: 0.00311 / MODE: explore
EPISODE:    776 / TIMESTEP:   190721 / DURATION:   358 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.2167 / AVG_LOSS: 0.00203 / MODE: explore
EPISODE:    777 / TIMESTEP:   191014 / DURATION:   293 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:    826 / TIMESTEP:   202966 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2225 / AVG_LOSS: 0.00141 / MODE: explore
EPISODE:    827 / TIMESTEP:   203140 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2207 / AVG_LOSS: 0.00184 / MODE: explore
EPISODE:    828 / TIMESTEP:   203435 / DURATION:   295 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2239 / AVG_LOSS: 0.00365 / MODE: explore
EPISODE:    829 / TIMESTEP:   203623 / DURATION:   188 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2266 / AVG_LOSS: 0.00313 / MODE: explore
EPISODE:    830 / TIMESTEP:   203898 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2251 / AVG_LOSS: 0.00168 / MODE: explore
EPISODE:    831 / TIMESTEP:   204309 / DURATION:   411 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.2248 / AVG_LOSS: 0.00403 / MODE: explore
EPISODE:    832 / TIMESTEP:   204491 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:    881 / TIMESTEP:   217271 / DURATION:   377 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.2326 / AVG_LOSS: 0.00324 / MODE: explore
EPISODE:    882 / TIMESTEP:   217611 / DURATION:   340 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.2314 / AVG_LOSS: 0.00228 / MODE: explore
EPISODE:    883 / TIMESTEP:   217895 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2360 / AVG_LOSS: 0.00359 / MODE: explore
EPISODE:    884 / TIMESTEP:   218064 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2331 / AVG_LOSS: 0.00376 / MODE: explore
EPISODE:    885 / TIMESTEP:   218240 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2259 / AVG_LOSS: 0.00154 / MODE: explore
EPISODE:    886 / TIMESTEP:   218551 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2289 / AVG_LOSS: 0.00266 / MODE: explore
EPISODE:    887 / TIMESTEP:   218969 / DURATION:   418 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_

EPISODE:    936 / TIMESTEP:   231192 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2419 / AVG_LOSS: 0.00352 / MODE: explore
EPISODE:    937 / TIMESTEP:   231692 / DURATION:   500 / EPSILON: 1.00000 / TOTAL_REWARD:   6 / AVG_MAX_Q: 0.2394 / AVG_LOSS: 0.00177 / MODE: explore
EPISODE:    938 / TIMESTEP:   231892 / DURATION:   200 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2345 / AVG_LOSS: 0.00228 / MODE: explore
EPISODE:    939 / TIMESTEP:   232245 / DURATION:   353 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.2353 / AVG_LOSS: 0.00171 / MODE: explore
EPISODE:    940 / TIMESTEP:   232661 / DURATION:   416 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.2394 / AVG_LOSS: 0.00284 / MODE: explore
EPISODE:    941 / TIMESTEP:   233058 / DURATION:   397 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.2391 / AVG_LOSS: 0.00295 / MODE: explore
EPISODE:    942 / TIMESTEP:   233320 / DURATION:   262 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:    991 / TIMESTEP:   245389 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2436 / AVG_LOSS: 0.00258 / MODE: explore
EPISODE:    992 / TIMESTEP:   245570 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2517 / AVG_LOSS: 0.00468 / MODE: explore
EPISODE:    993 / TIMESTEP:   245745 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2428 / AVG_LOSS: 0.00258 / MODE: explore
EPISODE:    994 / TIMESTEP:   245980 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2461 / AVG_LOSS: 0.00412 / MODE: explore
EPISODE:    995 / TIMESTEP:   246193 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2453 / AVG_LOSS: 0.00305 / MODE: explore
EPISODE:    996 / TIMESTEP:   246366 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2416 / AVG_LOSS: 0.00154 / MODE: explore
EPISODE:    997 / TIMESTEP:   246618 / DURATION:   252 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   1046 / TIMESTEP:   259161 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2535 / AVG_LOSS: 0.00258 / MODE: explore
EPISODE:   1047 / TIMESTEP:   259430 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2559 / AVG_LOSS: 0.00285 / MODE: explore
EPISODE:   1048 / TIMESTEP:   259712 / DURATION:   282 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2546 / AVG_LOSS: 0.00230 / MODE: explore
EPISODE:   1049 / TIMESTEP:   259936 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2542 / AVG_LOSS: 0.00211 / MODE: explore
EPISODE:   1050 / TIMESTEP:   260108 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2578 / AVG_LOSS: 0.00476 / MODE: explore
EPISODE:   1051 / TIMESTEP:   260377 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2674 / AVG_LOSS: 0.00330 / MODE: explore
EPISODE:   1052 / TIMESTEP:   260556 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   1101 / TIMESTEP:   272702 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2592 / AVG_LOSS: 0.00283 / MODE: explore
EPISODE:   1102 / TIMESTEP:   272918 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2614 / AVG_LOSS: 0.00360 / MODE: explore
EPISODE:   1103 / TIMESTEP:   273216 / DURATION:   298 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2672 / AVG_LOSS: 0.00306 / MODE: explore
EPISODE:   1104 / TIMESTEP:   273519 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2648 / AVG_LOSS: 0.00221 / MODE: explore
EPISODE:   1105 / TIMESTEP:   273759 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2607 / AVG_LOSS: 0.00227 / MODE: explore
EPISODE:   1106 / TIMESTEP:   273929 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2630 / AVG_LOSS: 0.00411 / MODE: explore
EPISODE:   1107 / TIMESTEP:   274172 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   1156 / TIMESTEP:   286392 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2659 / AVG_LOSS: 0.00357 / MODE: explore
EPISODE:   1157 / TIMESTEP:   286722 / DURATION:   330 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.2676 / AVG_LOSS: 0.00368 / MODE: explore
EPISODE:   1158 / TIMESTEP:   286933 / DURATION:   211 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2624 / AVG_LOSS: 0.00188 / MODE: explore
EPISODE:   1159 / TIMESTEP:   287097 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2632 / AVG_LOSS: 0.00160 / MODE: explore
EPISODE:   1160 / TIMESTEP:   287275 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2627 / AVG_LOSS: 0.00361 / MODE: explore
EPISODE:   1161 / TIMESTEP:   287510 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2631 / AVG_LOSS: 0.00224 / MODE: explore
EPISODE:   1162 / TIMESTEP:   287675 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   1211 / TIMESTEP:   299796 / DURATION:   432 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.2703 / AVG_LOSS: 0.00347 / MODE: explore
EPISODE:   1212 / TIMESTEP:   300084 / DURATION:   288 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2628 / AVG_LOSS: 0.00144 / MODE: explore
EPISODE:   1213 / TIMESTEP:   300296 / DURATION:   212 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2772 / AVG_LOSS: 0.00480 / MODE: explore
EPISODE:   1214 / TIMESTEP:   300531 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2729 / AVG_LOSS: 0.00407 / MODE: explore
EPISODE:   1215 / TIMESTEP:   300697 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2659 / AVG_LOSS: 0.00203 / MODE: explore
EPISODE:   1216 / TIMESTEP:   301054 / DURATION:   357 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.2627 / AVG_LOSS: 0.00193 / MODE: explore
EPISODE:   1217 / TIMESTEP:   301221 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   1266 / TIMESTEP:   313219 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2750 / AVG_LOSS: 0.00150 / MODE: explore
EPISODE:   1267 / TIMESTEP:   313499 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2759 / AVG_LOSS: 0.00233 / MODE: explore
EPISODE:   1268 / TIMESTEP:   313751 / DURATION:   252 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2701 / AVG_LOSS: 0.00210 / MODE: explore
EPISODE:   1269 / TIMESTEP:   313994 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.2769 / AVG_LOSS: 0.00275 / MODE: explore
EPISODE:   1270 / TIMESTEP:   314170 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2738 / AVG_LOSS: 0.00269 / MODE: explore
EPISODE:   1271 / TIMESTEP:   314417 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.2723 / AVG_LOSS: 0.00246 / MODE: explore
EPISODE:   1272 / TIMESTEP:   314691 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   1321 / TIMESTEP:   325769 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2908 / AVG_LOSS: 0.00158 / MODE: explore
EPISODE:   1322 / TIMESTEP:   326149 / DURATION:   380 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.2925 / AVG_LOSS: 0.00304 / MODE: explore
EPISODE:   1323 / TIMESTEP:   326322 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2986 / AVG_LOSS: 0.00341 / MODE: explore
EPISODE:   1324 / TIMESTEP:   326510 / DURATION:   188 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2924 / AVG_LOSS: 0.00219 / MODE: explore
EPISODE:   1325 / TIMESTEP:   326686 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2908 / AVG_LOSS: 0.00261 / MODE: explore
EPISODE:   1326 / TIMESTEP:   326858 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2927 / AVG_LOSS: 0.00344 / MODE: explore
EPISODE:   1327 / TIMESTEP:   327044 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   1376 / TIMESTEP:   339232 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3019 / AVG_LOSS: 0.00239 / MODE: explore
EPISODE:   1377 / TIMESTEP:   339485 / DURATION:   253 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3102 / AVG_LOSS: 0.00456 / MODE: explore
EPISODE:   1378 / TIMESTEP:   339666 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.2993 / AVG_LOSS: 0.00232 / MODE: explore
EPISODE:   1379 / TIMESTEP:   339873 / DURATION:   207 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3040 / AVG_LOSS: 0.00248 / MODE: explore
EPISODE:   1380 / TIMESTEP:   340144 / DURATION:   271 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3136 / AVG_LOSS: 0.00268 / MODE: explore
EPISODE:   1381 / TIMESTEP:   340428 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3163 / AVG_LOSS: 0.00332 / MODE: explore
EPISODE:   1382 / TIMESTEP:   340609 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   1431 / TIMESTEP:   353426 / DURATION:   285 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3296 / AVG_LOSS: 0.00472 / MODE: explore
EPISODE:   1432 / TIMESTEP:   353781 / DURATION:   355 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.3294 / AVG_LOSS: 0.00318 / MODE: explore
EPISODE:   1433 / TIMESTEP:   354026 / DURATION:   245 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3258 / AVG_LOSS: 0.00130 / MODE: explore
EPISODE:   1434 / TIMESTEP:   354197 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3243 / AVG_LOSS: 0.00447 / MODE: explore
EPISODE:   1435 / TIMESTEP:   354376 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3200 / AVG_LOSS: 0.00128 / MODE: explore
EPISODE:   1436 / TIMESTEP:   354687 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3291 / AVG_LOSS: 0.00341 / MODE: explore
EPISODE:   1437 / TIMESTEP:   354866 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   1486 / TIMESTEP:   366609 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3251 / AVG_LOSS: 0.00124 / MODE: explore
EPISODE:   1487 / TIMESTEP:   366781 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3219 / AVG_LOSS: 0.00359 / MODE: explore
EPISODE:   1488 / TIMESTEP:   367166 / DURATION:   385 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.3192 / AVG_LOSS: 0.00221 / MODE: explore
EPISODE:   1489 / TIMESTEP:   367340 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3206 / AVG_LOSS: 0.00115 / MODE: explore
EPISODE:   1490 / TIMESTEP:   367592 / DURATION:   252 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3224 / AVG_LOSS: 0.00216 / MODE: explore
EPISODE:   1491 / TIMESTEP:   367841 / DURATION:   249 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3259 / AVG_LOSS: 0.00316 / MODE: explore
EPISODE:   1492 / TIMESTEP:   368071 / DURATION:   230 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   1541 / TIMESTEP:   380218 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3509 / AVG_LOSS: 0.00479 / MODE: explore
EPISODE:   1542 / TIMESTEP:   380518 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3410 / AVG_LOSS: 0.00284 / MODE: explore
EPISODE:   1543 / TIMESTEP:   380715 / DURATION:   197 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3433 / AVG_LOSS: 0.00267 / MODE: explore
EPISODE:   1544 / TIMESTEP:   380911 / DURATION:   196 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3494 / AVG_LOSS: 0.00575 / MODE: explore
EPISODE:   1545 / TIMESTEP:   381088 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3478 / AVG_LOSS: 0.00122 / MODE: explore
EPISODE:   1546 / TIMESTEP:   381286 / DURATION:   198 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3513 / AVG_LOSS: 0.00445 / MODE: explore
EPISODE:   1547 / TIMESTEP:   381452 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   1596 / TIMESTEP:   392875 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.3521 / AVG_LOSS: 0.00283 / MODE: explore
EPISODE:   1597 / TIMESTEP:   393037 / DURATION:   162 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3510 / AVG_LOSS: 0.00091 / MODE: explore
EPISODE:   1598 / TIMESTEP:   393318 / DURATION:   281 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3599 / AVG_LOSS: 0.00220 / MODE: explore
EPISODE:   1599 / TIMESTEP:   393525 / DURATION:   207 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3539 / AVG_LOSS: 0.00199 / MODE: explore
EPISODE:   1600 / TIMESTEP:   393940 / DURATION:   415 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 0.3480 / AVG_LOSS: 0.00195 / MODE: explore
EPISODE:   1601 / TIMESTEP:   394249 / DURATION:   309 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.3580 / AVG_LOSS: 0.00192 / MODE: explore
EPISODE:   1602 / TIMESTEP:   394475 / DURATION:   226 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   1651 / TIMESTEP:   406893 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3689 / AVG_LOSS: 0.00220 / MODE: explore
EPISODE:   1652 / TIMESTEP:   407048 / DURATION:   155 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3615 / AVG_LOSS: 0.00276 / MODE: explore
EPISODE:   1653 / TIMESTEP:   407369 / DURATION:   321 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3729 / AVG_LOSS: 0.00327 / MODE: explore
EPISODE:   1654 / TIMESTEP:   407631 / DURATION:   262 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3669 / AVG_LOSS: 0.00304 / MODE: explore
EPISODE:   1655 / TIMESTEP:   407812 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3702 / AVG_LOSS: 0.00399 / MODE: explore
EPISODE:   1656 / TIMESTEP:   408080 / DURATION:   268 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3709 / AVG_LOSS: 0.00469 / MODE: explore
EPISODE:   1657 / TIMESTEP:   408336 / DURATION:   256 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   1706 / TIMESTEP:   420790 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3916 / AVG_LOSS: 0.00395 / MODE: explore
EPISODE:   1707 / TIMESTEP:   421157 / DURATION:   367 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.3894 / AVG_LOSS: 0.00297 / MODE: explore
EPISODE:   1708 / TIMESTEP:   421327 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3895 / AVG_LOSS: 0.00393 / MODE: explore
EPISODE:   1709 / TIMESTEP:   421508 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3916 / AVG_LOSS: 0.00351 / MODE: explore
EPISODE:   1710 / TIMESTEP:   421680 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3854 / AVG_LOSS: 0.00283 / MODE: explore
EPISODE:   1711 / TIMESTEP:   421951 / DURATION:   271 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.3869 / AVG_LOSS: 0.00262 / MODE: explore
EPISODE:   1712 / TIMESTEP:   422345 / DURATION:   394 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_

EPISODE:   1761 / TIMESTEP:   434262 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3845 / AVG_LOSS: 0.00203 / MODE: explore
EPISODE:   1762 / TIMESTEP:   434418 / DURATION:   156 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3870 / AVG_LOSS: 0.00215 / MODE: explore
EPISODE:   1763 / TIMESTEP:   434656 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.3914 / AVG_LOSS: 0.00343 / MODE: explore
EPISODE:   1764 / TIMESTEP:   434996 / DURATION:   340 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.3951 / AVG_LOSS: 0.00217 / MODE: explore
EPISODE:   1765 / TIMESTEP:   435168 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3923 / AVG_LOSS: 0.00251 / MODE: explore
EPISODE:   1766 / TIMESTEP:   435480 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.3874 / AVG_LOSS: 0.00295 / MODE: explore
EPISODE:   1767 / TIMESTEP:   435740 / DURATION:   260 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   1816 / TIMESTEP:   447564 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4052 / AVG_LOSS: 0.00379 / MODE: explore
EPISODE:   1817 / TIMESTEP:   447987 / DURATION:   423 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.3969 / AVG_LOSS: 0.00234 / MODE: explore
EPISODE:   1818 / TIMESTEP:   448196 / DURATION:   209 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4052 / AVG_LOSS: 0.00165 / MODE: explore
EPISODE:   1819 / TIMESTEP:   448381 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4019 / AVG_LOSS: 0.00173 / MODE: explore
EPISODE:   1820 / TIMESTEP:   448553 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.3964 / AVG_LOSS: 0.00166 / MODE: explore
EPISODE:   1821 / TIMESTEP:   448846 / DURATION:   293 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4110 / AVG_LOSS: 0.00322 / MODE: explore
EPISODE:   1822 / TIMESTEP:   449119 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   1871 / TIMESTEP:   461763 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4144 / AVG_LOSS: 0.00267 / MODE: explore
EPISODE:   1872 / TIMESTEP:   462009 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4090 / AVG_LOSS: 0.00296 / MODE: explore
EPISODE:   1873 / TIMESTEP:   462213 / DURATION:   204 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4082 / AVG_LOSS: 0.00190 / MODE: explore
EPISODE:   1874 / TIMESTEP:   462513 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4117 / AVG_LOSS: 0.00298 / MODE: explore
EPISODE:   1875 / TIMESTEP:   462677 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4134 / AVG_LOSS: 0.00408 / MODE: explore
EPISODE:   1876 / TIMESTEP:   462925 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4189 / AVG_LOSS: 0.00206 / MODE: explore
EPISODE:   1877 / TIMESTEP:   463093 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   1926 / TIMESTEP:   475224 / DURATION:   405 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.4096 / AVG_LOSS: 0.00254 / MODE: explore
EPISODE:   1927 / TIMESTEP:   475519 / DURATION:   295 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4176 / AVG_LOSS: 0.00214 / MODE: explore
EPISODE:   1928 / TIMESTEP:   475689 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4172 / AVG_LOSS: 0.00127 / MODE: explore
EPISODE:   1929 / TIMESTEP:   476097 / DURATION:   408 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 0.4185 / AVG_LOSS: 0.00227 / MODE: explore
EPISODE:   1930 / TIMESTEP:   476347 / DURATION:   250 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4236 / AVG_LOSS: 0.00452 / MODE: explore
EPISODE:   1931 / TIMESTEP:   476686 / DURATION:   339 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.4176 / AVG_LOSS: 0.00385 / MODE: explore
EPISODE:   1932 / TIMESTEP:   476866 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   1981 / TIMESTEP:   488535 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4189 / AVG_LOSS: 0.00264 / MODE: explore
EPISODE:   1982 / TIMESTEP:   488715 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4196 / AVG_LOSS: 0.00412 / MODE: explore
EPISODE:   1983 / TIMESTEP:   488886 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4183 / AVG_LOSS: 0.00324 / MODE: explore
EPISODE:   1984 / TIMESTEP:   489075 / DURATION:   189 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4195 / AVG_LOSS: 0.00274 / MODE: explore
EPISODE:   1985 / TIMESTEP:   489249 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4216 / AVG_LOSS: 0.00350 / MODE: explore
EPISODE:   1986 / TIMESTEP:   489531 / DURATION:   282 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4239 / AVG_LOSS: 0.00280 / MODE: explore
EPISODE:   1987 / TIMESTEP:   489810 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   2036 / TIMESTEP:   501185 / DURATION:   219 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4283 / AVG_LOSS: 0.00213 / MODE: explore
EPISODE:   2037 / TIMESTEP:   501370 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4277 / AVG_LOSS: 0.00341 / MODE: explore
EPISODE:   2038 / TIMESTEP:   501645 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4391 / AVG_LOSS: 0.00414 / MODE: explore
EPISODE:   2039 / TIMESTEP:   501865 / DURATION:   220 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4292 / AVG_LOSS: 0.00212 / MODE: explore
EPISODE:   2040 / TIMESTEP:   502096 / DURATION:   231 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4318 / AVG_LOSS: 0.00352 / MODE: explore
EPISODE:   2041 / TIMESTEP:   502413 / DURATION:   317 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.4422 / AVG_LOSS: 0.00420 / MODE: explore
EPISODE:   2042 / TIMESTEP:   502659 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   2091 / TIMESTEP:   515172 / DURATION:   202 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4411 / AVG_LOSS: 0.00467 / MODE: explore
EPISODE:   2092 / TIMESTEP:   515360 / DURATION:   188 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4322 / AVG_LOSS: 0.00229 / MODE: explore
EPISODE:   2093 / TIMESTEP:   515655 / DURATION:   295 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4314 / AVG_LOSS: 0.00300 / MODE: explore
EPISODE:   2094 / TIMESTEP:   515834 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4273 / AVG_LOSS: 0.00148 / MODE: explore
EPISODE:   2095 / TIMESTEP:   516004 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4313 / AVG_LOSS: 0.00236 / MODE: explore
EPISODE:   2096 / TIMESTEP:   516190 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4288 / AVG_LOSS: 0.00287 / MODE: explore
EPISODE:   2097 / TIMESTEP:   516367 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   2146 / TIMESTEP:   527508 / DURATION:   325 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.4449 / AVG_LOSS: 0.00268 / MODE: explore
EPISODE:   2147 / TIMESTEP:   527752 / DURATION:   244 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4354 / AVG_LOSS: 0.00227 / MODE: explore
EPISODE:   2148 / TIMESTEP:   527960 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4388 / AVG_LOSS: 0.00130 / MODE: explore
EPISODE:   2149 / TIMESTEP:   528239 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4283 / AVG_LOSS: 0.00254 / MODE: explore
EPISODE:   2150 / TIMESTEP:   528486 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4401 / AVG_LOSS: 0.00155 / MODE: explore
EPISODE:   2151 / TIMESTEP:   528990 / DURATION:   504 / EPSILON: 1.00000 / TOTAL_REWARD:   6 / AVG_MAX_Q: 0.4409 / AVG_LOSS: 0.00269 / MODE: explore
EPISODE:   2152 / TIMESTEP:   529280 / DURATION:   290 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   2201 / TIMESTEP:   541377 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4512 / AVG_LOSS: 0.00416 / MODE: explore
EPISODE:   2202 / TIMESTEP:   541647 / DURATION:   270 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4483 / AVG_LOSS: 0.00200 / MODE: explore
EPISODE:   2203 / TIMESTEP:   541822 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4524 / AVG_LOSS: 0.00132 / MODE: explore
EPISODE:   2204 / TIMESTEP:   542032 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4228 / AVG_LOSS: 0.00207 / MODE: explore
EPISODE:   2205 / TIMESTEP:   542265 / DURATION:   233 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4582 / AVG_LOSS: 0.00318 / MODE: explore
EPISODE:   2206 / TIMESTEP:   542452 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4578 / AVG_LOSS: 0.00340 / MODE: explore
EPISODE:   2207 / TIMESTEP:   542626 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   2256 / TIMESTEP:   554493 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4569 / AVG_LOSS: 0.00291 / MODE: explore
EPISODE:   2257 / TIMESTEP:   554782 / DURATION:   289 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4623 / AVG_LOSS: 0.00240 / MODE: explore
EPISODE:   2258 / TIMESTEP:   554946 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4560 / AVG_LOSS: 0.00224 / MODE: explore
EPISODE:   2259 / TIMESTEP:   555257 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4680 / AVG_LOSS: 0.00301 / MODE: explore
EPISODE:   2260 / TIMESTEP:   555428 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4557 / AVG_LOSS: 0.00181 / MODE: explore
EPISODE:   2261 / TIMESTEP:   555657 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4634 / AVG_LOSS: 0.00291 / MODE: explore
EPISODE:   2262 / TIMESTEP:   555843 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   2311 / TIMESTEP:   567594 / DURATION:   354 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.4661 / AVG_LOSS: 0.00356 / MODE: explore
EPISODE:   2312 / TIMESTEP:   567833 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4835 / AVG_LOSS: 0.00340 / MODE: explore
EPISODE:   2313 / TIMESTEP:   568016 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4635 / AVG_LOSS: 0.00351 / MODE: explore
EPISODE:   2314 / TIMESTEP:   568195 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4638 / AVG_LOSS: 0.00152 / MODE: explore
EPISODE:   2315 / TIMESTEP:   568562 / DURATION:   367 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.4725 / AVG_LOSS: 0.00392 / MODE: explore
EPISODE:   2316 / TIMESTEP:   568805 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4760 / AVG_LOSS: 0.00221 / MODE: explore
EPISODE:   2317 / TIMESTEP:   569042 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   2366 / TIMESTEP:   580535 / DURATION:   194 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4896 / AVG_LOSS: 0.00387 / MODE: explore
EPISODE:   2367 / TIMESTEP:   580949 / DURATION:   414 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.4974 / AVG_LOSS: 0.00297 / MODE: explore
EPISODE:   2368 / TIMESTEP:   581252 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5069 / AVG_LOSS: 0.00328 / MODE: explore
EPISODE:   2369 / TIMESTEP:   581531 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5093 / AVG_LOSS: 0.00338 / MODE: explore
EPISODE:   2370 / TIMESTEP:   581837 / DURATION:   306 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4643 / AVG_LOSS: 0.00339 / MODE: explore
EPISODE:   2371 / TIMESTEP:   582067 / DURATION:   230 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5203 / AVG_LOSS: 0.00283 / MODE: explore
EPISODE:   2372 / TIMESTEP:   582237 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   2421 / TIMESTEP:   593939 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4675 / AVG_LOSS: 0.00255 / MODE: explore
EPISODE:   2422 / TIMESTEP:   594144 / DURATION:   205 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5041 / AVG_LOSS: 0.00484 / MODE: explore
EPISODE:   2423 / TIMESTEP:   594309 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4978 / AVG_LOSS: 0.00496 / MODE: explore
EPISODE:   2424 / TIMESTEP:   594484 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5050 / AVG_LOSS: 0.00406 / MODE: explore
EPISODE:   2425 / TIMESTEP:   594647 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5002 / AVG_LOSS: 0.00347 / MODE: explore
EPISODE:   2426 / TIMESTEP:   594942 / DURATION:   295 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4819 / AVG_LOSS: 0.00298 / MODE: explore
EPISODE:   2427 / TIMESTEP:   595456 / DURATION:   514 / EPSILON: 1.00000 / TOTAL_REWARD:   6 / AVG_

EPISODE:   2476 / TIMESTEP:   607611 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.5083 / AVG_LOSS: 0.00220 / MODE: explore
EPISODE:   2477 / TIMESTEP:   607794 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4949 / AVG_LOSS: 0.00279 / MODE: explore
EPISODE:   2478 / TIMESTEP:   608044 / DURATION:   250 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5102 / AVG_LOSS: 0.00313 / MODE: explore
EPISODE:   2479 / TIMESTEP:   608450 / DURATION:   406 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.4693 / AVG_LOSS: 0.00359 / MODE: explore
EPISODE:   2480 / TIMESTEP:   608738 / DURATION:   288 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4702 / AVG_LOSS: 0.00315 / MODE: explore
EPISODE:   2481 / TIMESTEP:   608917 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4915 / AVG_LOSS: 0.00195 / MODE: explore
EPISODE:   2482 / TIMESTEP:   609081 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   2531 / TIMESTEP:   620977 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4969 / AVG_LOSS: 0.00235 / MODE: explore
EPISODE:   2532 / TIMESTEP:   621252 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.4689 / AVG_LOSS: 0.00164 / MODE: explore
EPISODE:   2533 / TIMESTEP:   621424 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4988 / AVG_LOSS: 0.00343 / MODE: explore
EPISODE:   2534 / TIMESTEP:   621729 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5294 / AVG_LOSS: 0.00359 / MODE: explore
EPISODE:   2535 / TIMESTEP:   621908 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4975 / AVG_LOSS: 0.00256 / MODE: explore
EPISODE:   2536 / TIMESTEP:   622095 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4985 / AVG_LOSS: 0.00158 / MODE: explore
EPISODE:   2537 / TIMESTEP:   622367 / DURATION:   272 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   2586 / TIMESTEP:   635175 / DURATION:   212 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5089 / AVG_LOSS: 0.00356 / MODE: explore
EPISODE:   2587 / TIMESTEP:   635426 / DURATION:   251 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5183 / AVG_LOSS: 0.00224 / MODE: explore
EPISODE:   2588 / TIMESTEP:   635722 / DURATION:   296 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5140 / AVG_LOSS: 0.00279 / MODE: explore
EPISODE:   2589 / TIMESTEP:   636031 / DURATION:   309 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.5201 / AVG_LOSS: 0.00176 / MODE: explore
EPISODE:   2590 / TIMESTEP:   636207 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.4997 / AVG_LOSS: 0.00324 / MODE: explore
EPISODE:   2591 / TIMESTEP:   636436 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5168 / AVG_LOSS: 0.00276 / MODE: explore
EPISODE:   2592 / TIMESTEP:   636599 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   2641 / TIMESTEP:   649274 / DURATION:   221 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5384 / AVG_LOSS: 0.00355 / MODE: explore
EPISODE:   2642 / TIMESTEP:   649504 / DURATION:   230 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4958 / AVG_LOSS: 0.00268 / MODE: explore
EPISODE:   2643 / TIMESTEP:   649812 / DURATION:   308 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.5493 / AVG_LOSS: 0.00249 / MODE: explore
EPISODE:   2644 / TIMESTEP:   650049 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5592 / AVG_LOSS: 0.00270 / MODE: explore
EPISODE:   2645 / TIMESTEP:   650282 / DURATION:   233 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5699 / AVG_LOSS: 0.00355 / MODE: explore
EPISODE:   2646 / TIMESTEP:   650517 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5537 / AVG_LOSS: 0.00230 / MODE: explore
EPISODE:   2647 / TIMESTEP:   650758 / DURATION:   241 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   2696 / TIMESTEP:   662845 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5783 / AVG_LOSS: 0.00396 / MODE: explore
EPISODE:   2697 / TIMESTEP:   663119 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5026 / AVG_LOSS: 0.00408 / MODE: explore
EPISODE:   2698 / TIMESTEP:   663299 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5302 / AVG_LOSS: 0.00372 / MODE: explore
EPISODE:   2699 / TIMESTEP:   663598 / DURATION:   299 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5411 / AVG_LOSS: 0.00325 / MODE: explore
EPISODE:   2700 / TIMESTEP:   663750 / DURATION:   152 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5306 / AVG_LOSS: 0.00291 / MODE: explore
EPISODE:   2701 / TIMESTEP:   663922 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5350 / AVG_LOSS: 0.00418 / MODE: explore
EPISODE:   2702 / TIMESTEP:   664086 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   2751 / TIMESTEP:   676459 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5221 / AVG_LOSS: 0.00436 / MODE: explore
EPISODE:   2752 / TIMESTEP:   676697 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4930 / AVG_LOSS: 0.00356 / MODE: explore
EPISODE:   2753 / TIMESTEP:   676944 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.4956 / AVG_LOSS: 0.00248 / MODE: explore
EPISODE:   2754 / TIMESTEP:   677119 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5280 / AVG_LOSS: 0.00293 / MODE: explore
EPISODE:   2755 / TIMESTEP:   677417 / DURATION:   298 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5457 / AVG_LOSS: 0.00284 / MODE: explore
EPISODE:   2756 / TIMESTEP:   677594 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5347 / AVG_LOSS: 0.00389 / MODE: explore
EPISODE:   2757 / TIMESTEP:   677918 / DURATION:   324 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   2806 / TIMESTEP:   689773 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5606 / AVG_LOSS: 0.00280 / MODE: explore
EPISODE:   2807 / TIMESTEP:   689946 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5420 / AVG_LOSS: 0.00267 / MODE: explore
EPISODE:   2808 / TIMESTEP:   690246 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5948 / AVG_LOSS: 0.00295 / MODE: explore
EPISODE:   2809 / TIMESTEP:   690426 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5565 / AVG_LOSS: 0.00312 / MODE: explore
EPISODE:   2810 / TIMESTEP:   690806 / DURATION:   380 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.6102 / AVG_LOSS: 0.00334 / MODE: explore
EPISODE:   2811 / TIMESTEP:   691111 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.6018 / AVG_LOSS: 0.00259 / MODE: explore
EPISODE:   2812 / TIMESTEP:   691496 / DURATION:   385 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   2861 / TIMESTEP:   702827 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5859 / AVG_LOSS: 0.00239 / MODE: explore
EPISODE:   2862 / TIMESTEP:   703173 / DURATION:   346 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.5887 / AVG_LOSS: 0.00235 / MODE: explore
EPISODE:   2863 / TIMESTEP:   703407 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5828 / AVG_LOSS: 0.00338 / MODE: explore
EPISODE:   2864 / TIMESTEP:   703645 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6053 / AVG_LOSS: 0.00216 / MODE: explore
EPISODE:   2865 / TIMESTEP:   703948 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5964 / AVG_LOSS: 0.00259 / MODE: explore
EPISODE:   2866 / TIMESTEP:   704122 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5534 / AVG_LOSS: 0.00137 / MODE: explore
EPISODE:   2867 / TIMESTEP:   704368 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   2916 / TIMESTEP:   716201 / DURATION:   260 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5922 / AVG_LOSS: 0.00236 / MODE: explore
EPISODE:   2917 / TIMESTEP:   716497 / DURATION:   296 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6000 / AVG_LOSS: 0.00152 / MODE: explore
EPISODE:   2918 / TIMESTEP:   716666 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5605 / AVG_LOSS: 0.00261 / MODE: explore
EPISODE:   2919 / TIMESTEP:   716873 / DURATION:   207 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5461 / AVG_LOSS: 0.00331 / MODE: explore
EPISODE:   2920 / TIMESTEP:   717264 / DURATION:   391 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.6125 / AVG_LOSS: 0.00296 / MODE: explore
EPISODE:   2921 / TIMESTEP:   717733 / DURATION:   469 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 0.6326 / AVG_LOSS: 0.00241 / MODE: explore
EPISODE:   2922 / TIMESTEP:   717991 / DURATION:   258 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   2971 / TIMESTEP:   728912 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5725 / AVG_LOSS: 0.00223 / MODE: explore
EPISODE:   2972 / TIMESTEP:   729188 / DURATION:   276 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6212 / AVG_LOSS: 0.00248 / MODE: explore
EPISODE:   2973 / TIMESTEP:   729370 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5675 / AVG_LOSS: 0.00309 / MODE: explore
EPISODE:   2974 / TIMESTEP:   729546 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5659 / AVG_LOSS: 0.00189 / MODE: explore
EPISODE:   2975 / TIMESTEP:   729767 / DURATION:   221 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5903 / AVG_LOSS: 0.00198 / MODE: explore
EPISODE:   2976 / TIMESTEP:   730051 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6093 / AVG_LOSS: 0.00348 / MODE: explore
EPISODE:   2977 / TIMESTEP:   730334 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   3026 / TIMESTEP:   742567 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5458 / AVG_LOSS: 0.00293 / MODE: explore
EPISODE:   3027 / TIMESTEP:   742900 / DURATION:   333 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.5697 / AVG_LOSS: 0.00436 / MODE: explore
EPISODE:   3028 / TIMESTEP:   743075 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5284 / AVG_LOSS: 0.00252 / MODE: explore
EPISODE:   3029 / TIMESTEP:   743299 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5517 / AVG_LOSS: 0.00378 / MODE: explore
EPISODE:   3030 / TIMESTEP:   743599 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.5762 / AVG_LOSS: 0.00196 / MODE: explore
EPISODE:   3031 / TIMESTEP:   743855 / DURATION:   256 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5668 / AVG_LOSS: 0.00272 / MODE: explore
EPISODE:   3032 / TIMESTEP:   744011 / DURATION:   156 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   3081 / TIMESTEP:   756641 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5449 / AVG_LOSS: 0.00248 / MODE: explore
EPISODE:   3082 / TIMESTEP:   756869 / DURATION:   228 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6087 / AVG_LOSS: 0.00246 / MODE: explore
EPISODE:   3083 / TIMESTEP:   757080 / DURATION:   211 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.5622 / AVG_LOSS: 0.00330 / MODE: explore
EPISODE:   3084 / TIMESTEP:   757423 / DURATION:   343 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.6214 / AVG_LOSS: 0.00220 / MODE: explore
EPISODE:   3085 / TIMESTEP:   757648 / DURATION:   225 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5825 / AVG_LOSS: 0.00299 / MODE: explore
EPISODE:   3086 / TIMESTEP:   757858 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5335 / AVG_LOSS: 0.00144 / MODE: explore
EPISODE:   3087 / TIMESTEP:   758043 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   3136 / TIMESTEP:   769333 / DURATION:   320 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.6435 / AVG_LOSS: 0.00221 / MODE: explore
EPISODE:   3137 / TIMESTEP:   769569 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6344 / AVG_LOSS: 0.00328 / MODE: explore
EPISODE:   3138 / TIMESTEP:   769781 / DURATION:   212 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6150 / AVG_LOSS: 0.00250 / MODE: explore
EPISODE:   3139 / TIMESTEP:   769994 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.5775 / AVG_LOSS: 0.00260 / MODE: explore
EPISODE:   3140 / TIMESTEP:   770169 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6170 / AVG_LOSS: 0.00343 / MODE: explore
EPISODE:   3141 / TIMESTEP:   770345 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6190 / AVG_LOSS: 0.00285 / MODE: explore
EPISODE:   3142 / TIMESTEP:   770590 / DURATION:   245 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   3191 / TIMESTEP:   782737 / DURATION:   430 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.6165 / AVG_LOSS: 0.00294 / MODE: explore
EPISODE:   3192 / TIMESTEP:   782976 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6578 / AVG_LOSS: 0.00245 / MODE: explore
EPISODE:   3193 / TIMESTEP:   783277 / DURATION:   301 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6142 / AVG_LOSS: 0.00303 / MODE: explore
EPISODE:   3194 / TIMESTEP:   783543 / DURATION:   266 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6713 / AVG_LOSS: 0.00306 / MODE: explore
EPISODE:   3195 / TIMESTEP:   783848 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6543 / AVG_LOSS: 0.00317 / MODE: explore
EPISODE:   3196 / TIMESTEP:   784252 / DURATION:   404 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.5778 / AVG_LOSS: 0.00257 / MODE: explore
EPISODE:   3197 / TIMESTEP:   784484 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   3246 / TIMESTEP:   796555 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6676 / AVG_LOSS: 0.00230 / MODE: explore
EPISODE:   3247 / TIMESTEP:   796792 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6683 / AVG_LOSS: 0.00320 / MODE: explore
EPISODE:   3248 / TIMESTEP:   797008 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6589 / AVG_LOSS: 0.00197 / MODE: explore
EPISODE:   3249 / TIMESTEP:   797277 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6225 / AVG_LOSS: 0.00311 / MODE: explore
EPISODE:   3250 / TIMESTEP:   797458 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6226 / AVG_LOSS: 0.00288 / MODE: explore
EPISODE:   3251 / TIMESTEP:   797627 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6198 / AVG_LOSS: 0.00260 / MODE: explore
EPISODE:   3252 / TIMESTEP:   797886 / DURATION:   259 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   3301 / TIMESTEP:   810454 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6527 / AVG_LOSS: 0.00482 / MODE: explore
EPISODE:   3302 / TIMESTEP:   810718 / DURATION:   264 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6647 / AVG_LOSS: 0.00311 / MODE: explore
EPISODE:   3303 / TIMESTEP:   810895 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6539 / AVG_LOSS: 0.00291 / MODE: explore
EPISODE:   3304 / TIMESTEP:   811382 / DURATION:   487 / EPSILON: 1.00000 / TOTAL_REWARD:   6 / AVG_MAX_Q: 0.6955 / AVG_LOSS: 0.00304 / MODE: explore
EPISODE:   3305 / TIMESTEP:   811661 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7081 / AVG_LOSS: 0.00356 / MODE: explore
EPISODE:   3306 / TIMESTEP:   811845 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6556 / AVG_LOSS: 0.00323 / MODE: explore
EPISODE:   3307 / TIMESTEP:   812231 / DURATION:   386 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_

EPISODE:   3356 / TIMESTEP:   823487 / DURATION:   233 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6899 / AVG_LOSS: 0.00360 / MODE: explore
EPISODE:   3357 / TIMESTEP:   823794 / DURATION:   307 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6977 / AVG_LOSS: 0.00312 / MODE: explore
EPISODE:   3358 / TIMESTEP:   824050 / DURATION:   256 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7259 / AVG_LOSS: 0.00283 / MODE: explore
EPISODE:   3359 / TIMESTEP:   824288 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7119 / AVG_LOSS: 0.00267 / MODE: explore
EPISODE:   3360 / TIMESTEP:   824526 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6881 / AVG_LOSS: 0.00315 / MODE: explore
EPISODE:   3361 / TIMESTEP:   824694 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6714 / AVG_LOSS: 0.00293 / MODE: explore
EPISODE:   3362 / TIMESTEP:   825029 / DURATION:   335 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   3411 / TIMESTEP:   837312 / DURATION:   292 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7526 / AVG_LOSS: 0.00242 / MODE: explore
EPISODE:   3412 / TIMESTEP:   837555 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7648 / AVG_LOSS: 0.00262 / MODE: explore
EPISODE:   3413 / TIMESTEP:   837829 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7650 / AVG_LOSS: 0.00278 / MODE: explore
EPISODE:   3414 / TIMESTEP:   838011 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6723 / AVG_LOSS: 0.00311 / MODE: explore
EPISODE:   3415 / TIMESTEP:   838175 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6743 / AVG_LOSS: 0.00185 / MODE: explore
EPISODE:   3416 / TIMESTEP:   838487 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7805 / AVG_LOSS: 0.00314 / MODE: explore
EPISODE:   3417 / TIMESTEP:   838735 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   3466 / TIMESTEP:   850051 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6687 / AVG_LOSS: 0.00282 / MODE: explore
EPISODE:   3467 / TIMESTEP:   850258 / DURATION:   207 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6480 / AVG_LOSS: 0.00332 / MODE: explore
EPISODE:   3468 / TIMESTEP:   850478 / DURATION:   220 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6737 / AVG_LOSS: 0.00222 / MODE: explore
EPISODE:   3469 / TIMESTEP:   850691 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6930 / AVG_LOSS: 0.00357 / MODE: explore
EPISODE:   3470 / TIMESTEP:   850907 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6508 / AVG_LOSS: 0.00281 / MODE: explore
EPISODE:   3471 / TIMESTEP:   851073 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6598 / AVG_LOSS: 0.00371 / MODE: explore
EPISODE:   3472 / TIMESTEP:   851251 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   3521 / TIMESTEP:   863417 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7110 / AVG_LOSS: 0.00343 / MODE: explore
EPISODE:   3522 / TIMESTEP:   863600 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7020 / AVG_LOSS: 0.00295 / MODE: explore
EPISODE:   3523 / TIMESTEP:   863943 / DURATION:   343 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.7976 / AVG_LOSS: 0.00271 / MODE: explore
EPISODE:   3524 / TIMESTEP:   864189 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7163 / AVG_LOSS: 0.00312 / MODE: explore
EPISODE:   3525 / TIMESTEP:   864384 / DURATION:   195 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7049 / AVG_LOSS: 0.00412 / MODE: explore
EPISODE:   3526 / TIMESTEP:   864595 / DURATION:   211 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7479 / AVG_LOSS: 0.00254 / MODE: explore
EPISODE:   3527 / TIMESTEP:   864869 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   3576 / TIMESTEP:   877264 / DURATION:   212 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7470 / AVG_LOSS: 0.00321 / MODE: explore
EPISODE:   3577 / TIMESTEP:   877639 / DURATION:   375 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.8175 / AVG_LOSS: 0.00327 / MODE: explore
EPISODE:   3578 / TIMESTEP:   877855 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7691 / AVG_LOSS: 0.00269 / MODE: explore
EPISODE:   3579 / TIMESTEP:   878289 / DURATION:   434 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.8059 / AVG_LOSS: 0.00369 / MODE: explore
EPISODE:   3580 / TIMESTEP:   878536 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8041 / AVG_LOSS: 0.00312 / MODE: explore
EPISODE:   3581 / TIMESTEP:   878879 / DURATION:   343 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8260 / AVG_LOSS: 0.00314 / MODE: explore
EPISODE:   3582 / TIMESTEP:   879190 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   3631 / TIMESTEP:   890629 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7024 / AVG_LOSS: 0.00318 / MODE: explore
EPISODE:   3632 / TIMESTEP:   891033 / DURATION:   404 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 0.6750 / AVG_LOSS: 0.00258 / MODE: explore
EPISODE:   3633 / TIMESTEP:   891235 / DURATION:   202 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7045 / AVG_LOSS: 0.00279 / MODE: explore
EPISODE:   3634 / TIMESTEP:   891481 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7286 / AVG_LOSS: 0.00308 / MODE: explore
EPISODE:   3635 / TIMESTEP:   891697 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7581 / AVG_LOSS: 0.00467 / MODE: explore
EPISODE:   3636 / TIMESTEP:   891888 / DURATION:   191 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6937 / AVG_LOSS: 0.00362 / MODE: explore
EPISODE:   3637 / TIMESTEP:   892065 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   3686 / TIMESTEP:   904009 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7105 / AVG_LOSS: 0.00343 / MODE: explore
EPISODE:   3687 / TIMESTEP:   904244 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6941 / AVG_LOSS: 0.00282 / MODE: explore
EPISODE:   3688 / TIMESTEP:   904501 / DURATION:   257 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7516 / AVG_LOSS: 0.00359 / MODE: explore
EPISODE:   3689 / TIMESTEP:   904817 / DURATION:   316 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7813 / AVG_LOSS: 0.00405 / MODE: explore
EPISODE:   3690 / TIMESTEP:   905037 / DURATION:   220 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7793 / AVG_LOSS: 0.00349 / MODE: explore
EPISODE:   3691 / TIMESTEP:   905238 / DURATION:   201 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7552 / AVG_LOSS: 0.00467 / MODE: explore
EPISODE:   3692 / TIMESTEP:   905411 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   3741 / TIMESTEP:   917796 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7399 / AVG_LOSS: 0.00308 / MODE: explore
EPISODE:   3742 / TIMESTEP:   917984 / DURATION:   188 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7512 / AVG_LOSS: 0.00326 / MODE: explore
EPISODE:   3743 / TIMESTEP:   918153 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7351 / AVG_LOSS: 0.00291 / MODE: explore
EPISODE:   3744 / TIMESTEP:   918781 / DURATION:   628 / EPSILON: 1.00000 / TOTAL_REWARD:   9 / AVG_MAX_Q: 0.9416 / AVG_LOSS: 0.00350 / MODE: explore
EPISODE:   3745 / TIMESTEP:   919030 / DURATION:   249 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8000 / AVG_LOSS: 0.00386 / MODE: explore
EPISODE:   3746 / TIMESTEP:   919264 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7739 / AVG_LOSS: 0.00282 / MODE: explore
EPISODE:   3747 / TIMESTEP:   919503 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   3796 / TIMESTEP:   931991 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7504 / AVG_LOSS: 0.00326 / MODE: explore
EPISODE:   3797 / TIMESTEP:   932314 / DURATION:   323 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9002 / AVG_LOSS: 0.00316 / MODE: explore
EPISODE:   3798 / TIMESTEP:   932522 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7247 / AVG_LOSS: 0.00430 / MODE: explore
EPISODE:   3799 / TIMESTEP:   932696 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7340 / AVG_LOSS: 0.00278 / MODE: explore
EPISODE:   3800 / TIMESTEP:   932995 / DURATION:   299 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7151 / AVG_LOSS: 0.00315 / MODE: explore
EPISODE:   3801 / TIMESTEP:   933237 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8682 / AVG_LOSS: 0.00278 / MODE: explore
EPISODE:   3802 / TIMESTEP:   933501 / DURATION:   264 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   3851 / TIMESTEP:   945763 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7177 / AVG_LOSS: 0.00291 / MODE: explore
EPISODE:   3852 / TIMESTEP:   946002 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7892 / AVG_LOSS: 0.00287 / MODE: explore
EPISODE:   3853 / TIMESTEP:   946195 / DURATION:   193 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7131 / AVG_LOSS: 0.00306 / MODE: explore
EPISODE:   3854 / TIMESTEP:   946603 / DURATION:   408 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.8420 / AVG_LOSS: 0.00286 / MODE: explore
EPISODE:   3855 / TIMESTEP:   946913 / DURATION:   310 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6975 / AVG_LOSS: 0.00321 / MODE: explore
EPISODE:   3856 / TIMESTEP:   947089 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7236 / AVG_LOSS: 0.00274 / MODE: explore
EPISODE:   3857 / TIMESTEP:   947254 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   3906 / TIMESTEP:   958721 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7232 / AVG_LOSS: 0.00239 / MODE: explore
EPISODE:   3907 / TIMESTEP:   958930 / DURATION:   209 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7547 / AVG_LOSS: 0.00185 / MODE: explore
EPISODE:   3908 / TIMESTEP:   959196 / DURATION:   266 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8587 / AVG_LOSS: 0.00274 / MODE: explore
EPISODE:   3909 / TIMESTEP:   959476 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8487 / AVG_LOSS: 0.00274 / MODE: explore
EPISODE:   3910 / TIMESTEP:   959669 / DURATION:   193 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7786 / AVG_LOSS: 0.00280 / MODE: explore
EPISODE:   3911 / TIMESTEP:   959923 / DURATION:   254 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7699 / AVG_LOSS: 0.00210 / MODE: explore
EPISODE:   3912 / TIMESTEP:   960210 / DURATION:   287 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   3961 / TIMESTEP:   972794 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8314 / AVG_LOSS: 0.00266 / MODE: explore
EPISODE:   3962 / TIMESTEP:   972994 / DURATION:   200 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7691 / AVG_LOSS: 0.00266 / MODE: explore
EPISODE:   3963 / TIMESTEP:   973154 / DURATION:   160 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6839 / AVG_LOSS: 0.00213 / MODE: explore
EPISODE:   3964 / TIMESTEP:   973332 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7168 / AVG_LOSS: 0.00297 / MODE: explore
EPISODE:   3965 / TIMESTEP:   973640 / DURATION:   308 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7513 / AVG_LOSS: 0.00242 / MODE: explore
EPISODE:   3966 / TIMESTEP:   973950 / DURATION:   310 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8826 / AVG_LOSS: 0.00335 / MODE: explore
EPISODE:   3967 / TIMESTEP:   974120 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   4016 / TIMESTEP:   986429 / DURATION:   268 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8199 / AVG_LOSS: 0.00337 / MODE: explore
EPISODE:   4017 / TIMESTEP:   986709 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7298 / AVG_LOSS: 0.00258 / MODE: explore
EPISODE:   4018 / TIMESTEP:   986888 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7115 / AVG_LOSS: 0.00333 / MODE: explore
EPISODE:   4019 / TIMESTEP:   987258 / DURATION:   370 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8927 / AVG_LOSS: 0.00289 / MODE: explore
EPISODE:   4020 / TIMESTEP:   987566 / DURATION:   308 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9183 / AVG_LOSS: 0.00403 / MODE: explore
EPISODE:   4021 / TIMESTEP:   987734 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6902 / AVG_LOSS: 0.00162 / MODE: explore
EPISODE:   4022 / TIMESTEP:   988081 / DURATION:   347 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   4071 / TIMESTEP:  1000272 / DURATION:   188 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6821 / AVG_LOSS: 0.00191 / MODE: explore
EPISODE:   4072 / TIMESTEP:  1000546 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8787 / AVG_LOSS: 0.00288 / MODE: explore
EPISODE:   4073 / TIMESTEP:  1000785 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7347 / AVG_LOSS: 0.00318 / MODE: explore
EPISODE:   4074 / TIMESTEP:  1001007 / DURATION:   222 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8874 / AVG_LOSS: 0.00356 / MODE: explore
EPISODE:   4075 / TIMESTEP:  1001416 / DURATION:   409 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.9107 / AVG_LOSS: 0.00270 / MODE: explore
EPISODE:   4076 / TIMESTEP:  1001636 / DURATION:   220 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7620 / AVG_LOSS: 0.00192 / MODE: explore
EPISODE:   4077 / TIMESTEP:  1001861 / DURATION:   225 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   4126 / TIMESTEP:  1014142 / DURATION:   288 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8268 / AVG_LOSS: 0.00327 / MODE: explore
EPISODE:   4127 / TIMESTEP:  1014509 / DURATION:   367 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.9150 / AVG_LOSS: 0.00263 / MODE: explore
EPISODE:   4128 / TIMESTEP:  1014682 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7031 / AVG_LOSS: 0.00173 / MODE: explore
EPISODE:   4129 / TIMESTEP:  1015057 / DURATION:   375 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.8297 / AVG_LOSS: 0.00193 / MODE: explore
EPISODE:   4130 / TIMESTEP:  1015224 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6782 / AVG_LOSS: 0.00222 / MODE: explore
EPISODE:   4131 / TIMESTEP:  1015439 / DURATION:   215 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7683 / AVG_LOSS: 0.00290 / MODE: explore
EPISODE:   4132 / TIMESTEP:  1015610 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   4181 / TIMESTEP:  1026991 / DURATION:   253 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7867 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:   4182 / TIMESTEP:  1027168 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7103 / AVG_LOSS: 0.00182 / MODE: exploit
EPISODE:   4183 / TIMESTEP:  1027353 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7088 / AVG_LOSS: 0.00246 / MODE: exploit
EPISODE:   4184 / TIMESTEP:  1027567 / DURATION:   214 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7458 / AVG_LOSS: 0.00217 / MODE: exploit
EPISODE:   4185 / TIMESTEP:  1027844 / DURATION:   277 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8320 / AVG_LOSS: 0.00267 / MODE: exploit
EPISODE:   4186 / TIMESTEP:  1028107 / DURATION:   263 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8788 / AVG_LOSS: 0.00300 / MODE: exploit
EPISODE:   4187 / TIMESTEP:  1028445 / DURATION:   338 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   4236 / TIMESTEP:  1040723 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7224 / AVG_LOSS: 0.00293 / MODE: exploit
EPISODE:   4237 / TIMESTEP:  1041035 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9271 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:   4238 / TIMESTEP:  1041213 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7241 / AVG_LOSS: 0.00239 / MODE: exploit
EPISODE:   4239 / TIMESTEP:  1041649 / DURATION:   436 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.9441 / AVG_LOSS: 0.00305 / MODE: exploit
EPISODE:   4240 / TIMESTEP:  1041824 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7409 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   4241 / TIMESTEP:  1042053 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8984 / AVG_LOSS: 0.00249 / MODE: exploit
EPISODE:   4242 / TIMESTEP:  1042294 / DURATION:   241 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   4291 / TIMESTEP:  1054316 / DURATION:   445 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.8103 / AVG_LOSS: 0.00302 / MODE: exploit
EPISODE:   4292 / TIMESTEP:  1054485 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7455 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   4293 / TIMESTEP:  1054758 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9365 / AVG_LOSS: 0.00225 / MODE: exploit
EPISODE:   4294 / TIMESTEP:  1055192 / DURATION:   434 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 0.9071 / AVG_LOSS: 0.00335 / MODE: exploit
EPISODE:   4295 / TIMESTEP:  1055369 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7149 / AVG_LOSS: 0.00258 / MODE: exploit
EPISODE:   4296 / TIMESTEP:  1055684 / DURATION:   315 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8175 / AVG_LOSS: 0.00284 / MODE: exploit
EPISODE:   4297 / TIMESTEP:  1055995 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   4346 / TIMESTEP:  1068589 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9275 / AVG_LOSS: 0.00357 / MODE: exploit
EPISODE:   4347 / TIMESTEP:  1068831 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8198 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   4348 / TIMESTEP:  1069140 / DURATION:   309 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8201 / AVG_LOSS: 0.00240 / MODE: exploit
EPISODE:   4349 / TIMESTEP:  1069419 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9382 / AVG_LOSS: 0.00187 / MODE: exploit
EPISODE:   4350 / TIMESTEP:  1069723 / DURATION:   304 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8045 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   4351 / TIMESTEP:  1069953 / DURATION:   230 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8136 / AVG_LOSS: 0.00252 / MODE: exploit
EPISODE:   4352 / TIMESTEP:  1070229 / DURATION:   276 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   4401 / TIMESTEP:  1082124 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7166 / AVG_LOSS: 0.00341 / MODE: exploit
EPISODE:   4402 / TIMESTEP:  1082373 / DURATION:   249 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8946 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   4403 / TIMESTEP:  1082696 / DURATION:   323 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8515 / AVG_LOSS: 0.00343 / MODE: exploit
EPISODE:   4404 / TIMESTEP:  1083007 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8149 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:   4405 / TIMESTEP:  1083192 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7273 / AVG_LOSS: 0.00338 / MODE: exploit
EPISODE:   4406 / TIMESTEP:  1083503 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8552 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:   4407 / TIMESTEP:  1083789 / DURATION:   286 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   4456 / TIMESTEP:  1095387 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6990 / AVG_LOSS: 0.00323 / MODE: exploit
EPISODE:   4457 / TIMESTEP:  1095626 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8100 / AVG_LOSS: 0.00288 / MODE: exploit
EPISODE:   4458 / TIMESTEP:  1095797 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7158 / AVG_LOSS: 0.00217 / MODE: exploit
EPISODE:   4459 / TIMESTEP:  1096007 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6932 / AVG_LOSS: 0.00210 / MODE: exploit
EPISODE:   4460 / TIMESTEP:  1096345 / DURATION:   338 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8915 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   4461 / TIMESTEP:  1096620 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7976 / AVG_LOSS: 0.00269 / MODE: exploit
EPISODE:   4462 / TIMESTEP:  1096799 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   4511 / TIMESTEP:  1108055 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7081 / AVG_LOSS: 0.00169 / MODE: exploit
EPISODE:   4512 / TIMESTEP:  1108236 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7281 / AVG_LOSS: 0.00274 / MODE: exploit
EPISODE:   4513 / TIMESTEP:  1108422 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7155 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   4514 / TIMESTEP:  1108599 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7052 / AVG_LOSS: 0.00261 / MODE: exploit
EPISODE:   4515 / TIMESTEP:  1108901 / DURATION:   302 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7693 / AVG_LOSS: 0.00255 / MODE: exploit
EPISODE:   4516 / TIMESTEP:  1109132 / DURATION:   231 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8546 / AVG_LOSS: 0.00226 / MODE: exploit
EPISODE:   4517 / TIMESTEP:  1109546 / DURATION:   414 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_

EPISODE:   4566 / TIMESTEP:  1121848 / DURATION:   402 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.8309 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   4567 / TIMESTEP:  1122016 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7261 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:   4568 / TIMESTEP:  1122256 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8581 / AVG_LOSS: 0.00214 / MODE: exploit
EPISODE:   4569 / TIMESTEP:  1122429 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7181 / AVG_LOSS: 0.00206 / MODE: exploit
EPISODE:   4570 / TIMESTEP:  1122795 / DURATION:   366 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.7230 / AVG_LOSS: 0.00300 / MODE: exploit
EPISODE:   4571 / TIMESTEP:  1123123 / DURATION:   328 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8430 / AVG_LOSS: 0.00172 / MODE: exploit
EPISODE:   4572 / TIMESTEP:  1123482 / DURATION:   359 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_

EPISODE:   4621 / TIMESTEP:  1134909 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7032 / AVG_LOSS: 0.00361 / MODE: exploit
EPISODE:   4622 / TIMESTEP:  1135103 / DURATION:   194 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7718 / AVG_LOSS: 0.00238 / MODE: exploit
EPISODE:   4623 / TIMESTEP:  1135332 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6992 / AVG_LOSS: 0.00242 / MODE: exploit
EPISODE:   4624 / TIMESTEP:  1135503 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7068 / AVG_LOSS: 0.00199 / MODE: exploit
EPISODE:   4625 / TIMESTEP:  1135679 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7113 / AVG_LOSS: 0.00196 / MODE: exploit
EPISODE:   4626 / TIMESTEP:  1135879 / DURATION:   200 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7968 / AVG_LOSS: 0.00270 / MODE: exploit
EPISODE:   4627 / TIMESTEP:  1136113 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   4676 / TIMESTEP:  1148697 / DURATION:   277 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9159 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   4677 / TIMESTEP:  1148970 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9001 / AVG_LOSS: 0.00280 / MODE: exploit
EPISODE:   4678 / TIMESTEP:  1149136 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7053 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:   4679 / TIMESTEP:  1149414 / DURATION:   278 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9206 / AVG_LOSS: 0.00262 / MODE: exploit
EPISODE:   4680 / TIMESTEP:  1149656 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8540 / AVG_LOSS: 0.00267 / MODE: exploit
EPISODE:   4681 / TIMESTEP:  1149993 / DURATION:   337 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8839 / AVG_LOSS: 0.00284 / MODE: exploit
EPISODE:   4682 / TIMESTEP:  1150166 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   4731 / TIMESTEP:  1162096 / DURATION:   219 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8452 / AVG_LOSS: 0.00268 / MODE: exploit
EPISODE:   4732 / TIMESTEP:  1162515 / DURATION:   419 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 0.8671 / AVG_LOSS: 0.00268 / MODE: exploit
EPISODE:   4733 / TIMESTEP:  1162701 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6997 / AVG_LOSS: 0.00213 / MODE: exploit
EPISODE:   4734 / TIMESTEP:  1162952 / DURATION:   251 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7955 / AVG_LOSS: 0.00248 / MODE: exploit
EPISODE:   4735 / TIMESTEP:  1163131 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6864 / AVG_LOSS: 0.00299 / MODE: exploit
EPISODE:   4736 / TIMESTEP:  1163318 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6926 / AVG_LOSS: 0.00217 / MODE: exploit
EPISODE:   4737 / TIMESTEP:  1163589 / DURATION:   271 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   4786 / TIMESTEP:  1176024 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6867 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:   4787 / TIMESTEP:  1176287 / DURATION:   263 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.6456 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:   4788 / TIMESTEP:  1176466 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6872 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   4789 / TIMESTEP:  1176651 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6823 / AVG_LOSS: 0.00232 / MODE: exploit
EPISODE:   4790 / TIMESTEP:  1176846 / DURATION:   195 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6870 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:   4791 / TIMESTEP:  1177185 / DURATION:   339 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8795 / AVG_LOSS: 0.00267 / MODE: exploit
EPISODE:   4792 / TIMESTEP:  1177357 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   4841 / TIMESTEP:  1189871 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6504 / AVG_LOSS: 0.00294 / MODE: exploit
EPISODE:   4842 / TIMESTEP:  1190049 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6640 / AVG_LOSS: 0.00223 / MODE: exploit
EPISODE:   4843 / TIMESTEP:  1190218 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6587 / AVG_LOSS: 0.00280 / MODE: exploit
EPISODE:   4844 / TIMESTEP:  1190397 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6643 / AVG_LOSS: 0.00262 / MODE: exploit
EPISODE:   4845 / TIMESTEP:  1190634 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7179 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:   4846 / TIMESTEP:  1190969 / DURATION:   335 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7492 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   4847 / TIMESTEP:  1191274 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   4896 / TIMESTEP:  1202662 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6843 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:   4897 / TIMESTEP:  1202836 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6763 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   4898 / TIMESTEP:  1203175 / DURATION:   339 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8229 / AVG_LOSS: 0.00350 / MODE: exploit
EPISODE:   4899 / TIMESTEP:  1203419 / DURATION:   244 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8432 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:   4900 / TIMESTEP:  1203604 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6775 / AVG_LOSS: 0.00243 / MODE: exploit
EPISODE:   4901 / TIMESTEP:  1203772 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6947 / AVG_LOSS: 0.00335 / MODE: exploit
EPISODE:   4902 / TIMESTEP:  1204067 / DURATION:   295 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   4951 / TIMESTEP:  1215948 / DURATION:   251 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7330 / AVG_LOSS: 0.00250 / MODE: exploit
EPISODE:   4952 / TIMESTEP:  1216172 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7309 / AVG_LOSS: 0.00250 / MODE: exploit
EPISODE:   4953 / TIMESTEP:  1216462 / DURATION:   290 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8992 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:   4954 / TIMESTEP:  1216637 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6685 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   4955 / TIMESTEP:  1216862 / DURATION:   225 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7593 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   4956 / TIMESTEP:  1217027 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6580 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:   4957 / TIMESTEP:  1217261 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   5006 / TIMESTEP:  1228913 / DURATION:   225 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9269 / AVG_LOSS: 0.00393 / MODE: exploit
EPISODE:   5007 / TIMESTEP:  1229284 / DURATION:   371 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9156 / AVG_LOSS: 0.00315 / MODE: exploit
EPISODE:   5008 / TIMESTEP:  1229447 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6851 / AVG_LOSS: 0.00331 / MODE: exploit
EPISODE:   5009 / TIMESTEP:  1229751 / DURATION:   304 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7425 / AVG_LOSS: 0.00280 / MODE: exploit
EPISODE:   5010 / TIMESTEP:  1230018 / DURATION:   267 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8485 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   5011 / TIMESTEP:  1230345 / DURATION:   327 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8919 / AVG_LOSS: 0.00335 / MODE: exploit
EPISODE:   5012 / TIMESTEP:  1230516 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   5061 / TIMESTEP:  1242589 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7041 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:   5062 / TIMESTEP:  1242892 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8279 / AVG_LOSS: 0.00275 / MODE: exploit
EPISODE:   5063 / TIMESTEP:  1243074 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6871 / AVG_LOSS: 0.00270 / MODE: exploit
EPISODE:   5064 / TIMESTEP:  1243331 / DURATION:   257 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7892 / AVG_LOSS: 0.00261 / MODE: exploit
EPISODE:   5065 / TIMESTEP:  1243547 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8238 / AVG_LOSS: 0.00378 / MODE: exploit
EPISODE:   5066 / TIMESTEP:  1243715 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7014 / AVG_LOSS: 0.00339 / MODE: exploit
EPISODE:   5067 / TIMESTEP:  1243933 / DURATION:   218 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   5116 / TIMESTEP:  1255668 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6544 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:   5117 / TIMESTEP:  1255904 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7821 / AVG_LOSS: 0.00217 / MODE: exploit
EPISODE:   5118 / TIMESTEP:  1256069 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6823 / AVG_LOSS: 0.00248 / MODE: exploit
EPISODE:   5119 / TIMESTEP:  1256245 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6936 / AVG_LOSS: 0.00310 / MODE: exploit
EPISODE:   5120 / TIMESTEP:  1256421 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6828 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   5121 / TIMESTEP:  1256751 / DURATION:   330 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0014 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   5122 / TIMESTEP:  1257030 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   5171 / TIMESTEP:  1269196 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7091 / AVG_LOSS: 0.00274 / MODE: exploit
EPISODE:   5172 / TIMESTEP:  1269394 / DURATION:   198 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8162 / AVG_LOSS: 0.00362 / MODE: exploit
EPISODE:   5173 / TIMESTEP:  1269667 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9095 / AVG_LOSS: 0.00269 / MODE: exploit
EPISODE:   5174 / TIMESTEP:  1269877 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8988 / AVG_LOSS: 0.00307 / MODE: exploit
EPISODE:   5175 / TIMESTEP:  1270152 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8746 / AVG_LOSS: 0.00315 / MODE: exploit
EPISODE:   5176 / TIMESTEP:  1270416 / DURATION:   264 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9507 / AVG_LOSS: 0.00308 / MODE: exploit
EPISODE:   5177 / TIMESTEP:  1270756 / DURATION:   340 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   5226 / TIMESTEP:  1284065 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9285 / AVG_LOSS: 0.00279 / MODE: exploit
EPISODE:   5227 / TIMESTEP:  1284276 / DURATION:   211 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7625 / AVG_LOSS: 0.00391 / MODE: exploit
EPISODE:   5228 / TIMESTEP:  1284444 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7041 / AVG_LOSS: 0.00242 / MODE: exploit
EPISODE:   5229 / TIMESTEP:  1284626 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6775 / AVG_LOSS: 0.00269 / MODE: exploit
EPISODE:   5230 / TIMESTEP:  1284845 / DURATION:   219 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8750 / AVG_LOSS: 0.00345 / MODE: exploit
EPISODE:   5231 / TIMESTEP:  1285071 / DURATION:   226 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8036 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:   5232 / TIMESTEP:  1285261 / DURATION:   190 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   5281 / TIMESTEP:  1297162 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6930 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:   5282 / TIMESTEP:  1297364 / DURATION:   202 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7714 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   5283 / TIMESTEP:  1297591 / DURATION:   227 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7555 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:   5284 / TIMESTEP:  1297791 / DURATION:   200 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8125 / AVG_LOSS: 0.00275 / MODE: exploit
EPISODE:   5285 / TIMESTEP:  1298116 / DURATION:   325 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9851 / AVG_LOSS: 0.00326 / MODE: exploit
EPISODE:   5286 / TIMESTEP:  1298281 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7063 / AVG_LOSS: 0.00331 / MODE: exploit
EPISODE:   5287 / TIMESTEP:  1298717 / DURATION:   436 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_

EPISODE:   5336 / TIMESTEP:  1310195 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7143 / AVG_LOSS: 0.00274 / MODE: exploit
EPISODE:   5337 / TIMESTEP:  1310517 / DURATION:   322 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8458 / AVG_LOSS: 0.00261 / MODE: exploit
EPISODE:   5338 / TIMESTEP:  1310693 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6948 / AVG_LOSS: 0.00358 / MODE: exploit
EPISODE:   5339 / TIMESTEP:  1311074 / DURATION:   381 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.7419 / AVG_LOSS: 0.00249 / MODE: exploit
EPISODE:   5340 / TIMESTEP:  1311354 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8042 / AVG_LOSS: 0.00319 / MODE: exploit
EPISODE:   5341 / TIMESTEP:  1311605 / DURATION:   251 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7724 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   5342 / TIMESTEP:  1311779 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   5391 / TIMESTEP:  1324354 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7875 / AVG_LOSS: 0.00188 / MODE: exploit
EPISODE:   5392 / TIMESTEP:  1324634 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9290 / AVG_LOSS: 0.00343 / MODE: exploit
EPISODE:   5393 / TIMESTEP:  1324933 / DURATION:   299 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0070 / AVG_LOSS: 0.00270 / MODE: exploit
EPISODE:   5394 / TIMESTEP:  1325179 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7240 / AVG_LOSS: 0.00256 / MODE: exploit
EPISODE:   5395 / TIMESTEP:  1325355 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7129 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:   5396 / TIMESTEP:  1325709 / DURATION:   354 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.9989 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   5397 / TIMESTEP:  1326033 / DURATION:   324 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   5446 / TIMESTEP:  1338219 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9675 / AVG_LOSS: 0.00242 / MODE: exploit
EPISODE:   5447 / TIMESTEP:  1338393 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7112 / AVG_LOSS: 0.00233 / MODE: exploit
EPISODE:   5448 / TIMESTEP:  1338705 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7702 / AVG_LOSS: 0.00242 / MODE: exploit
EPISODE:   5449 / TIMESTEP:  1338888 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6841 / AVG_LOSS: 0.00272 / MODE: exploit
EPISODE:   5450 / TIMESTEP:  1339051 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6985 / AVG_LOSS: 0.00186 / MODE: exploit
EPISODE:   5451 / TIMESTEP:  1339220 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7012 / AVG_LOSS: 0.00240 / MODE: exploit
EPISODE:   5452 / TIMESTEP:  1339532 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   5501 / TIMESTEP:  1351445 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7062 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   5502 / TIMESTEP:  1351720 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9010 / AVG_LOSS: 0.00241 / MODE: exploit
EPISODE:   5503 / TIMESTEP:  1352237 / DURATION:   517 / EPSILON: 1.00000 / TOTAL_REWARD:   6 / AVG_MAX_Q: 1.0368 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:   5504 / TIMESTEP:  1352472 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9690 / AVG_LOSS: 0.00325 / MODE: exploit
EPISODE:   5505 / TIMESTEP:  1352705 / DURATION:   233 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9430 / AVG_LOSS: 0.00232 / MODE: exploit
EPISODE:   5506 / TIMESTEP:  1352979 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8670 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   5507 / TIMESTEP:  1353154 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   5556 / TIMESTEP:  1365420 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6930 / AVG_LOSS: 0.00300 / MODE: exploit
EPISODE:   5557 / TIMESTEP:  1365660 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8482 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:   5558 / TIMESTEP:  1366027 / DURATION:   367 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.9079 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:   5559 / TIMESTEP:  1366305 / DURATION:   278 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9557 / AVG_LOSS: 0.00324 / MODE: exploit
EPISODE:   5560 / TIMESTEP:  1366510 / DURATION:   205 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8210 / AVG_LOSS: 0.00202 / MODE: exploit
EPISODE:   5561 / TIMESTEP:  1366786 / DURATION:   276 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9952 / AVG_LOSS: 0.00236 / MODE: exploit
EPISODE:   5562 / TIMESTEP:  1367009 / DURATION:   223 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   5611 / TIMESTEP:  1378273 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7043 / AVG_LOSS: 0.00195 / MODE: exploit
EPISODE:   5612 / TIMESTEP:  1378547 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9653 / AVG_LOSS: 0.00237 / MODE: exploit
EPISODE:   5613 / TIMESTEP:  1378790 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7780 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:   5614 / TIMESTEP:  1378974 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6927 / AVG_LOSS: 0.00204 / MODE: exploit
EPISODE:   5615 / TIMESTEP:  1379213 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7534 / AVG_LOSS: 0.00248 / MODE: exploit
EPISODE:   5616 / TIMESTEP:  1379414 / DURATION:   201 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8108 / AVG_LOSS: 0.00271 / MODE: exploit
EPISODE:   5617 / TIMESTEP:  1379784 / DURATION:   370 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   5666 / TIMESTEP:  1393015 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9390 / AVG_LOSS: 0.00291 / MODE: exploit
EPISODE:   5667 / TIMESTEP:  1393265 / DURATION:   250 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8219 / AVG_LOSS: 0.00221 / MODE: exploit
EPISODE:   5668 / TIMESTEP:  1393515 / DURATION:   250 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7986 / AVG_LOSS: 0.00248 / MODE: exploit
EPISODE:   5669 / TIMESTEP:  1393784 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8688 / AVG_LOSS: 0.00252 / MODE: exploit
EPISODE:   5670 / TIMESTEP:  1394069 / DURATION:   285 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9186 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:   5671 / TIMESTEP:  1394236 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7347 / AVG_LOSS: 0.00368 / MODE: exploit
EPISODE:   5672 / TIMESTEP:  1394600 / DURATION:   364 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   5721 / TIMESTEP:  1406362 / DURATION:   334 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9913 / AVG_LOSS: 0.00305 / MODE: exploit
EPISODE:   5722 / TIMESTEP:  1406733 / DURATION:   371 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9432 / AVG_LOSS: 0.00216 / MODE: exploit
EPISODE:   5723 / TIMESTEP:  1406916 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6970 / AVG_LOSS: 0.00185 / MODE: exploit
EPISODE:   5724 / TIMESTEP:  1407154 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9334 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:   5725 / TIMESTEP:  1407492 / DURATION:   338 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0318 / AVG_LOSS: 0.00267 / MODE: exploit
EPISODE:   5726 / TIMESTEP:  1407899 / DURATION:   407 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.9250 / AVG_LOSS: 0.00227 / MODE: exploit
EPISODE:   5727 / TIMESTEP:  1408259 / DURATION:   360 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   5776 / TIMESTEP:  1420439 / DURATION:   291 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8365 / AVG_LOSS: 0.00268 / MODE: exploit
EPISODE:   5777 / TIMESTEP:  1420610 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7478 / AVG_LOSS: 0.00237 / MODE: exploit
EPISODE:   5778 / TIMESTEP:  1420857 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8953 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   5779 / TIMESTEP:  1421023 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7594 / AVG_LOSS: 0.00271 / MODE: exploit
EPISODE:   5780 / TIMESTEP:  1421360 / DURATION:   337 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.1080 / AVG_LOSS: 0.00253 / MODE: exploit
EPISODE:   5781 / TIMESTEP:  1421674 / DURATION:   314 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9975 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:   5782 / TIMESTEP:  1421900 / DURATION:   226 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   5831 / TIMESTEP:  1434532 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6924 / AVG_LOSS: 0.00265 / MODE: exploit
EPISODE:   5832 / TIMESTEP:  1434815 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8390 / AVG_LOSS: 0.00236 / MODE: exploit
EPISODE:   5833 / TIMESTEP:  1435068 / DURATION:   253 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8813 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   5834 / TIMESTEP:  1435412 / DURATION:   344 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9500 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   5835 / TIMESTEP:  1435731 / DURATION:   319 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0071 / AVG_LOSS: 0.00289 / MODE: exploit
EPISODE:   5836 / TIMESTEP:  1436082 / DURATION:   351 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9902 / AVG_LOSS: 0.00299 / MODE: exploit
EPISODE:   5837 / TIMESTEP:  1436295 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   5886 / TIMESTEP:  1448231 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6956 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:   5887 / TIMESTEP:  1448405 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7142 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:   5888 / TIMESTEP:  1448680 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8810 / AVG_LOSS: 0.00242 / MODE: exploit
EPISODE:   5889 / TIMESTEP:  1448853 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7179 / AVG_LOSS: 0.00279 / MODE: exploit
EPISODE:   5890 / TIMESTEP:  1449077 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9566 / AVG_LOSS: 0.00293 / MODE: exploit
EPISODE:   5891 / TIMESTEP:  1449228 / DURATION:   151 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6691 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   5892 / TIMESTEP:  1449459 / DURATION:   231 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   5941 / TIMESTEP:  1461531 / DURATION:   295 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0726 / AVG_LOSS: 0.00223 / MODE: exploit
EPISODE:   5942 / TIMESTEP:  1461699 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7004 / AVG_LOSS: 0.00301 / MODE: exploit
EPISODE:   5943 / TIMESTEP:  1461970 / DURATION:   271 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0343 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:   5944 / TIMESTEP:  1462140 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7093 / AVG_LOSS: 0.00220 / MODE: exploit
EPISODE:   5945 / TIMESTEP:  1462396 / DURATION:   256 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8225 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:   5946 / TIMESTEP:  1462702 / DURATION:   306 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8275 / AVG_LOSS: 0.00308 / MODE: exploit
EPISODE:   5947 / TIMESTEP:  1462976 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   5996 / TIMESTEP:  1475415 / DURATION:   254 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8042 / AVG_LOSS: 0.00267 / MODE: exploit
EPISODE:   5997 / TIMESTEP:  1475588 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6705 / AVG_LOSS: 0.00299 / MODE: exploit
EPISODE:   5998 / TIMESTEP:  1475758 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6895 / AVG_LOSS: 0.00291 / MODE: exploit
EPISODE:   5999 / TIMESTEP:  1475940 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7184 / AVG_LOSS: 0.00288 / MODE: exploit
EPISODE:   6000 / TIMESTEP:  1476179 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7502 / AVG_LOSS: 0.00238 / MODE: exploit
EPISODE:   6001 / TIMESTEP:  1476514 / DURATION:   335 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.7754 / AVG_LOSS: 0.00230 / MODE: exploit
EPISODE:   6002 / TIMESTEP:  1476680 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   6051 / TIMESTEP:  1488828 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7897 / AVG_LOSS: 0.00271 / MODE: exploit
EPISODE:   6052 / TIMESTEP:  1489000 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6853 / AVG_LOSS: 0.00262 / MODE: exploit
EPISODE:   6053 / TIMESTEP:  1489350 / DURATION:   350 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9747 / AVG_LOSS: 0.00301 / MODE: exploit
EPISODE:   6054 / TIMESTEP:  1489592 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7644 / AVG_LOSS: 0.00284 / MODE: exploit
EPISODE:   6055 / TIMESTEP:  1489772 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6752 / AVG_LOSS: 0.00218 / MODE: exploit
EPISODE:   6056 / TIMESTEP:  1489995 / DURATION:   223 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8240 / AVG_LOSS: 0.00236 / MODE: exploit
EPISODE:   6057 / TIMESTEP:  1490170 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   6106 / TIMESTEP:  1502234 / DURATION:   372 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9530 / AVG_LOSS: 0.00274 / MODE: exploit
EPISODE:   6107 / TIMESTEP:  1502522 / DURATION:   288 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7777 / AVG_LOSS: 0.00289 / MODE: exploit
EPISODE:   6108 / TIMESTEP:  1502700 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6618 / AVG_LOSS: 0.00346 / MODE: exploit
EPISODE:   6109 / TIMESTEP:  1502901 / DURATION:   201 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7853 / AVG_LOSS: 0.00239 / MODE: exploit
EPISODE:   6110 / TIMESTEP:  1503219 / DURATION:   318 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.8167 / AVG_LOSS: 0.00267 / MODE: exploit
EPISODE:   6111 / TIMESTEP:  1503433 / DURATION:   214 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.6929 / AVG_LOSS: 0.00289 / MODE: exploit
EPISODE:   6112 / TIMESTEP:  1503609 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   6161 / TIMESTEP:  1516188 / DURATION:   203 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8327 / AVG_LOSS: 0.00258 / MODE: exploit
EPISODE:   6162 / TIMESTEP:  1516480 / DURATION:   292 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9411 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:   6163 / TIMESTEP:  1516662 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7021 / AVG_LOSS: 0.00190 / MODE: exploit
EPISODE:   6164 / TIMESTEP:  1516884 / DURATION:   222 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7581 / AVG_LOSS: 0.00284 / MODE: exploit
EPISODE:   6165 / TIMESTEP:  1517160 / DURATION:   276 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9328 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:   6166 / TIMESTEP:  1517555 / DURATION:   395 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 0.9524 / AVG_LOSS: 0.00274 / MODE: exploit
EPISODE:   6167 / TIMESTEP:  1517866 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   6216 / TIMESTEP:  1528622 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6800 / AVG_LOSS: 0.00264 / MODE: exploit
EPISODE:   6217 / TIMESTEP:  1528932 / DURATION:   310 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8727 / AVG_LOSS: 0.00287 / MODE: exploit
EPISODE:   6218 / TIMESTEP:  1529235 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8803 / AVG_LOSS: 0.00236 / MODE: exploit
EPISODE:   6219 / TIMESTEP:  1529559 / DURATION:   324 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9597 / AVG_LOSS: 0.00234 / MODE: exploit
EPISODE:   6220 / TIMESTEP:  1529773 / DURATION:   214 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7517 / AVG_LOSS: 0.00238 / MODE: exploit
EPISODE:   6221 / TIMESTEP:  1530062 / DURATION:   289 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9794 / AVG_LOSS: 0.00258 / MODE: exploit
EPISODE:   6222 / TIMESTEP:  1530334 / DURATION:   272 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   6271 / TIMESTEP:  1542148 / DURATION:   351 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9380 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:   6272 / TIMESTEP:  1542318 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7323 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:   6273 / TIMESTEP:  1542720 / DURATION:   402 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 0.9571 / AVG_LOSS: 0.00316 / MODE: exploit
EPISODE:   6274 / TIMESTEP:  1543200 / DURATION:   480 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.1610 / AVG_LOSS: 0.00269 / MODE: exploit
EPISODE:   6275 / TIMESTEP:  1543381 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6813 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   6276 / TIMESTEP:  1543671 / DURATION:   290 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8344 / AVG_LOSS: 0.00246 / MODE: exploit
EPISODE:   6277 / TIMESTEP:  1543856 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   6326 / TIMESTEP:  1555639 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6930 / AVG_LOSS: 0.00210 / MODE: exploit
EPISODE:   6327 / TIMESTEP:  1555814 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7036 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:   6328 / TIMESTEP:  1555986 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6558 / AVG_LOSS: 0.00355 / MODE: exploit
EPISODE:   6329 / TIMESTEP:  1556288 / DURATION:   302 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9253 / AVG_LOSS: 0.00232 / MODE: exploit
EPISODE:   6330 / TIMESTEP:  1556571 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0650 / AVG_LOSS: 0.00233 / MODE: exploit
EPISODE:   6331 / TIMESTEP:  1556795 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7779 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   6332 / TIMESTEP:  1557009 / DURATION:   214 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   6381 / TIMESTEP:  1568855 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7057 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   6382 / TIMESTEP:  1569072 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8767 / AVG_LOSS: 0.00236 / MODE: exploit
EPISODE:   6383 / TIMESTEP:  1569241 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7308 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:   6384 / TIMESTEP:  1569422 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7036 / AVG_LOSS: 0.00300 / MODE: exploit
EPISODE:   6385 / TIMESTEP:  1569697 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8615 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:   6386 / TIMESTEP:  1569905 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7947 / AVG_LOSS: 0.00264 / MODE: exploit
EPISODE:   6387 / TIMESTEP:  1570335 / DURATION:   430 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_

EPISODE:   6436 / TIMESTEP:  1581931 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8088 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:   6437 / TIMESTEP:  1582103 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7192 / AVG_LOSS: 0.00280 / MODE: exploit
EPISODE:   6438 / TIMESTEP:  1582335 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8443 / AVG_LOSS: 0.00302 / MODE: exploit
EPISODE:   6439 / TIMESTEP:  1582519 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7560 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:   6440 / TIMESTEP:  1582891 / DURATION:   372 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.1983 / AVG_LOSS: 0.00296 / MODE: exploit
EPISODE:   6441 / TIMESTEP:  1583191 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7308 / AVG_LOSS: 0.00272 / MODE: exploit
EPISODE:   6442 / TIMESTEP:  1583428 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   6491 / TIMESTEP:  1594429 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7359 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:   6492 / TIMESTEP:  1594668 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8183 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:   6493 / TIMESTEP:  1594954 / DURATION:   286 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0118 / AVG_LOSS: 0.00224 / MODE: exploit
EPISODE:   6494 / TIMESTEP:  1595203 / DURATION:   249 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9466 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   6495 / TIMESTEP:  1595379 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7263 / AVG_LOSS: 0.00200 / MODE: exploit
EPISODE:   6496 / TIMESTEP:  1595552 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6935 / AVG_LOSS: 0.00245 / MODE: exploit
EPISODE:   6497 / TIMESTEP:  1595786 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   6546 / TIMESTEP:  1607166 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.7997 / AVG_LOSS: 0.00261 / MODE: exploit
EPISODE:   6547 / TIMESTEP:  1607475 / DURATION:   309 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.7998 / AVG_LOSS: 0.00265 / MODE: exploit
EPISODE:   6548 / TIMESTEP:  1607773 / DURATION:   298 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0679 / AVG_LOSS: 0.00263 / MODE: exploit
EPISODE:   6549 / TIMESTEP:  1608113 / DURATION:   340 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0516 / AVG_LOSS: 0.00252 / MODE: exploit
EPISODE:   6550 / TIMESTEP:  1608380 / DURATION:   267 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8684 / AVG_LOSS: 0.00316 / MODE: exploit
EPISODE:   6551 / TIMESTEP:  1608552 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7150 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:   6552 / TIMESTEP:  1608792 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   6601 / TIMESTEP:  1620910 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7488 / AVG_LOSS: 0.00242 / MODE: exploit
EPISODE:   6602 / TIMESTEP:  1621192 / DURATION:   282 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9738 / AVG_LOSS: 0.00271 / MODE: exploit
EPISODE:   6603 / TIMESTEP:  1621392 / DURATION:   200 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8877 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   6604 / TIMESTEP:  1621680 / DURATION:   288 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8202 / AVG_LOSS: 0.00318 / MODE: exploit
EPISODE:   6605 / TIMESTEP:  1621990 / DURATION:   310 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8877 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   6606 / TIMESTEP:  1622146 / DURATION:   156 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.6567 / AVG_LOSS: 0.00209 / MODE: exploit
EPISODE:   6607 / TIMESTEP:  1622340 / DURATION:   194 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   6656 / TIMESTEP:  1634074 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8859 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   6657 / TIMESTEP:  1634279 / DURATION:   205 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9235 / AVG_LOSS: 0.00238 / MODE: exploit
EPISODE:   6658 / TIMESTEP:  1634497 / DURATION:   218 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8883 / AVG_LOSS: 0.00274 / MODE: exploit
EPISODE:   6659 / TIMESTEP:  1634687 / DURATION:   190 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7716 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:   6660 / TIMESTEP:  1634857 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7313 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:   6661 / TIMESTEP:  1635225 / DURATION:   368 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.1512 / AVG_LOSS: 0.00245 / MODE: exploit
EPISODE:   6662 / TIMESTEP:  1635401 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   6711 / TIMESTEP:  1646442 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7434 / AVG_LOSS: 0.00217 / MODE: exploit
EPISODE:   6712 / TIMESTEP:  1646624 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7491 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:   6713 / TIMESTEP:  1646804 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7507 / AVG_LOSS: 0.00279 / MODE: exploit
EPISODE:   6714 / TIMESTEP:  1647151 / DURATION:   347 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9521 / AVG_LOSS: 0.00273 / MODE: exploit
EPISODE:   6715 / TIMESTEP:  1647459 / DURATION:   308 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9964 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:   6716 / TIMESTEP:  1647642 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7430 / AVG_LOSS: 0.00237 / MODE: exploit
EPISODE:   6717 / TIMESTEP:  1647880 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   6766 / TIMESTEP:  1659412 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0094 / AVG_LOSS: 0.00344 / MODE: exploit
EPISODE:   6767 / TIMESTEP:  1659862 / DURATION:   450 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.1150 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   6768 / TIMESTEP:  1660205 / DURATION:   343 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 0.9958 / AVG_LOSS: 0.00262 / MODE: exploit
EPISODE:   6769 / TIMESTEP:  1660379 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7554 / AVG_LOSS: 0.00314 / MODE: exploit
EPISODE:   6770 / TIMESTEP:  1660569 / DURATION:   190 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7120 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:   6771 / TIMESTEP:  1660855 / DURATION:   286 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9717 / AVG_LOSS: 0.00237 / MODE: exploit
EPISODE:   6772 / TIMESTEP:  1661127 / DURATION:   272 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   6821 / TIMESTEP:  1672822 / DURATION:   144 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7354 / AVG_LOSS: 0.00269 / MODE: exploit
EPISODE:   6822 / TIMESTEP:  1673080 / DURATION:   258 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0859 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:   6823 / TIMESTEP:  1673364 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0273 / AVG_LOSS: 0.00245 / MODE: exploit
EPISODE:   6824 / TIMESTEP:  1673586 / DURATION:   222 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8998 / AVG_LOSS: 0.00263 / MODE: exploit
EPISODE:   6825 / TIMESTEP:  1673756 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8140 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:   6826 / TIMESTEP:  1674080 / DURATION:   324 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0546 / AVG_LOSS: 0.00244 / MODE: exploit
EPISODE:   6827 / TIMESTEP:  1674361 / DURATION:   281 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   6876 / TIMESTEP:  1686797 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9275 / AVG_LOSS: 0.00235 / MODE: exploit
EPISODE:   6877 / TIMESTEP:  1687067 / DURATION:   270 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8151 / AVG_LOSS: 0.00225 / MODE: exploit
EPISODE:   6878 / TIMESTEP:  1687275 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8498 / AVG_LOSS: 0.00239 / MODE: exploit
EPISODE:   6879 / TIMESTEP:  1687660 / DURATION:   385 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.0412 / AVG_LOSS: 0.00253 / MODE: exploit
EPISODE:   6880 / TIMESTEP:  1687965 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8523 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:   6881 / TIMESTEP:  1688261 / DURATION:   296 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.8901 / AVG_LOSS: 0.00263 / MODE: exploit
EPISODE:   6882 / TIMESTEP:  1688460 / DURATION:   199 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   6931 / TIMESTEP:  1701112 / DURATION:   145 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7509 / AVG_LOSS: 0.00226 / MODE: exploit
EPISODE:   6932 / TIMESTEP:  1701285 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8113 / AVG_LOSS: 0.00318 / MODE: exploit
EPISODE:   6933 / TIMESTEP:  1701553 / DURATION:   268 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0072 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:   6934 / TIMESTEP:  1701836 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0680 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   6935 / TIMESTEP:  1702022 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7911 / AVG_LOSS: 0.00302 / MODE: exploit
EPISODE:   6936 / TIMESTEP:  1702411 / DURATION:   389 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.1497 / AVG_LOSS: 0.00326 / MODE: exploit
EPISODE:   6937 / TIMESTEP:  1702655 / DURATION:   244 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   6986 / TIMESTEP:  1714277 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9972 / AVG_LOSS: 0.00338 / MODE: exploit
EPISODE:   6987 / TIMESTEP:  1714508 / DURATION:   231 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0060 / AVG_LOSS: 0.00309 / MODE: exploit
EPISODE:   6988 / TIMESTEP:  1714744 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9376 / AVG_LOSS: 0.00265 / MODE: exploit
EPISODE:   6989 / TIMESTEP:  1714998 / DURATION:   254 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9507 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:   6990 / TIMESTEP:  1715175 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7821 / AVG_LOSS: 0.00211 / MODE: exploit
EPISODE:   6991 / TIMESTEP:  1715423 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0490 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:   6992 / TIMESTEP:  1715824 / DURATION:   401 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_

EPISODE:   7041 / TIMESTEP:  1727619 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7874 / AVG_LOSS: 0.00330 / MODE: exploit
EPISODE:   7042 / TIMESTEP:  1727967 / DURATION:   348 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0889 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:   7043 / TIMESTEP:  1728147 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7784 / AVG_LOSS: 0.00351 / MODE: exploit
EPISODE:   7044 / TIMESTEP:  1728374 / DURATION:   227 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1489 / AVG_LOSS: 0.00424 / MODE: exploit
EPISODE:   7045 / TIMESTEP:  1728554 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8080 / AVG_LOSS: 0.00268 / MODE: exploit
EPISODE:   7046 / TIMESTEP:  1728764 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8762 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:   7047 / TIMESTEP:  1728937 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   7096 / TIMESTEP:  1740628 / DURATION:   272 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9838 / AVG_LOSS: 0.00350 / MODE: exploit
EPISODE:   7097 / TIMESTEP:  1741062 / DURATION:   434 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.1089 / AVG_LOSS: 0.00279 / MODE: exploit
EPISODE:   7098 / TIMESTEP:  1741338 / DURATION:   276 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0644 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:   7099 / TIMESTEP:  1741509 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7884 / AVG_LOSS: 0.00264 / MODE: exploit
EPISODE:   7100 / TIMESTEP:  1741726 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9886 / AVG_LOSS: 0.00279 / MODE: exploit
EPISODE:   7101 / TIMESTEP:  1741957 / DURATION:   231 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0449 / AVG_LOSS: 0.00294 / MODE: exploit
EPISODE:   7102 / TIMESTEP:  1742203 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   7151 / TIMESTEP:  1754447 / DURATION:   466 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.2075 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   7152 / TIMESTEP:  1754761 / DURATION:   314 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1963 / AVG_LOSS: 0.00236 / MODE: exploit
EPISODE:   7153 / TIMESTEP:  1755098 / DURATION:   337 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0420 / AVG_LOSS: 0.00301 / MODE: exploit
EPISODE:   7154 / TIMESTEP:  1755262 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9025 / AVG_LOSS: 0.00256 / MODE: exploit
EPISODE:   7155 / TIMESTEP:  1755567 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9497 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:   7156 / TIMESTEP:  1755804 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9737 / AVG_LOSS: 0.00254 / MODE: exploit
EPISODE:   7157 / TIMESTEP:  1755974 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   7206 / TIMESTEP:  1768600 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0773 / AVG_LOSS: 0.00254 / MODE: exploit
EPISODE:   7207 / TIMESTEP:  1768835 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9672 / AVG_LOSS: 0.00299 / MODE: exploit
EPISODE:   7208 / TIMESTEP:  1769108 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0304 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:   7209 / TIMESTEP:  1769321 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0138 / AVG_LOSS: 0.00310 / MODE: exploit
EPISODE:   7210 / TIMESTEP:  1769742 / DURATION:   421 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.1967 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   7211 / TIMESTEP:  1770047 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1560 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   7212 / TIMESTEP:  1770230 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   7261 / TIMESTEP:  1782831 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1353 / AVG_LOSS: 0.00295 / MODE: exploit
EPISODE:   7262 / TIMESTEP:  1783004 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7871 / AVG_LOSS: 0.00244 / MODE: exploit
EPISODE:   7263 / TIMESTEP:  1783180 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7314 / AVG_LOSS: 0.00316 / MODE: exploit
EPISODE:   7264 / TIMESTEP:  1783385 / DURATION:   205 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8753 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:   7265 / TIMESTEP:  1783616 / DURATION:   231 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9472 / AVG_LOSS: 0.00351 / MODE: exploit
EPISODE:   7266 / TIMESTEP:  1783787 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7749 / AVG_LOSS: 0.00340 / MODE: exploit
EPISODE:   7267 / TIMESTEP:  1784070 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   7316 / TIMESTEP:  1795451 / DURATION:   219 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8963 / AVG_LOSS: 0.00354 / MODE: exploit
EPISODE:   7317 / TIMESTEP:  1795625 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7880 / AVG_LOSS: 0.00337 / MODE: exploit
EPISODE:   7318 / TIMESTEP:  1795797 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8590 / AVG_LOSS: 0.00315 / MODE: exploit
EPISODE:   7319 / TIMESTEP:  1796046 / DURATION:   249 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8336 / AVG_LOSS: 0.00349 / MODE: exploit
EPISODE:   7320 / TIMESTEP:  1796390 / DURATION:   344 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.1126 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:   7321 / TIMESTEP:  1796674 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1129 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:   7322 / TIMESTEP:  1796937 / DURATION:   263 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   7371 / TIMESTEP:  1809466 / DURATION:   270 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0233 / AVG_LOSS: 0.00333 / MODE: exploit
EPISODE:   7372 / TIMESTEP:  1809709 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9075 / AVG_LOSS: 0.00335 / MODE: exploit
EPISODE:   7373 / TIMESTEP:  1809880 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8100 / AVG_LOSS: 0.00340 / MODE: exploit
EPISODE:   7374 / TIMESTEP:  1810167 / DURATION:   287 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0525 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:   7375 / TIMESTEP:  1810342 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7878 / AVG_LOSS: 0.00343 / MODE: exploit
EPISODE:   7376 / TIMESTEP:  1810515 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8338 / AVG_LOSS: 0.00302 / MODE: exploit
EPISODE:   7377 / TIMESTEP:  1810695 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   7426 / TIMESTEP:  1823644 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9169 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:   7427 / TIMESTEP:  1823983 / DURATION:   339 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2453 / AVG_LOSS: 0.00333 / MODE: exploit
EPISODE:   7428 / TIMESTEP:  1824286 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0833 / AVG_LOSS: 0.00291 / MODE: exploit
EPISODE:   7429 / TIMESTEP:  1824549 / DURATION:   263 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9813 / AVG_LOSS: 0.00310 / MODE: exploit
EPISODE:   7430 / TIMESTEP:  1824755 / DURATION:   206 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9519 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   7431 / TIMESTEP:  1825029 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0028 / AVG_LOSS: 0.00324 / MODE: exploit
EPISODE:   7432 / TIMESTEP:  1825379 / DURATION:   350 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   7481 / TIMESTEP:  1837782 / DURATION:   285 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1283 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:   7482 / TIMESTEP:  1838086 / DURATION:   304 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1092 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:   7483 / TIMESTEP:  1838308 / DURATION:   222 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.8650 / AVG_LOSS: 0.00246 / MODE: exploit
EPISODE:   7484 / TIMESTEP:  1838558 / DURATION:   250 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0421 / AVG_LOSS: 0.00272 / MODE: exploit
EPISODE:   7485 / TIMESTEP:  1838832 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1109 / AVG_LOSS: 0.00317 / MODE: exploit
EPISODE:   7486 / TIMESTEP:  1839074 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9626 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   7487 / TIMESTEP:  1839240 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   7536 / TIMESTEP:  1851377 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9553 / AVG_LOSS: 0.00336 / MODE: exploit
EPISODE:   7537 / TIMESTEP:  1851557 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8560 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:   7538 / TIMESTEP:  1851730 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7837 / AVG_LOSS: 0.00401 / MODE: exploit
EPISODE:   7539 / TIMESTEP:  1851901 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8651 / AVG_LOSS: 0.00295 / MODE: exploit
EPISODE:   7540 / TIMESTEP:  1852127 / DURATION:   226 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9992 / AVG_LOSS: 0.00352 / MODE: exploit
EPISODE:   7541 / TIMESTEP:  1852362 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1426 / AVG_LOSS: 0.00333 / MODE: exploit
EPISODE:   7542 / TIMESTEP:  1852637 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   7591 / TIMESTEP:  1864661 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9952 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   7592 / TIMESTEP:  1864959 / DURATION:   298 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3090 / AVG_LOSS: 0.00289 / MODE: exploit
EPISODE:   7593 / TIMESTEP:  1865145 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8512 / AVG_LOSS: 0.00339 / MODE: exploit
EPISODE:   7594 / TIMESTEP:  1865379 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9192 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   7595 / TIMESTEP:  1865566 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8487 / AVG_LOSS: 0.00294 / MODE: exploit
EPISODE:   7596 / TIMESTEP:  1865738 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8357 / AVG_LOSS: 0.00358 / MODE: exploit
EPISODE:   7597 / TIMESTEP:  1865908 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   7646 / TIMESTEP:  1877980 / DURATION:   281 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1710 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:   7647 / TIMESTEP:  1878155 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7873 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:   7648 / TIMESTEP:  1878453 / DURATION:   298 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 0.9649 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   7649 / TIMESTEP:  1878690 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1027 / AVG_LOSS: 0.00375 / MODE: exploit
EPISODE:   7650 / TIMESTEP:  1878892 / DURATION:   202 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0432 / AVG_LOSS: 0.00360 / MODE: exploit
EPISODE:   7651 / TIMESTEP:  1879062 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8335 / AVG_LOSS: 0.00332 / MODE: exploit
EPISODE:   7652 / TIMESTEP:  1879350 / DURATION:   288 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   7701 / TIMESTEP:  1891242 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8050 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   7702 / TIMESTEP:  1891409 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8510 / AVG_LOSS: 0.00397 / MODE: exploit
EPISODE:   7703 / TIMESTEP:  1891678 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0458 / AVG_LOSS: 0.00369 / MODE: exploit
EPISODE:   7704 / TIMESTEP:  1891864 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9254 / AVG_LOSS: 0.00301 / MODE: exploit
EPISODE:   7705 / TIMESTEP:  1892039 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8069 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:   7706 / TIMESTEP:  1892225 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.7983 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   7707 / TIMESTEP:  1892462 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   7756 / TIMESTEP:  1903936 / DURATION:   159 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8482 / AVG_LOSS: 0.00341 / MODE: exploit
EPISODE:   7757 / TIMESTEP:  1904277 / DURATION:   341 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0254 / AVG_LOSS: 0.00302 / MODE: exploit
EPISODE:   7758 / TIMESTEP:  1904445 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8056 / AVG_LOSS: 0.00364 / MODE: exploit
EPISODE:   7759 / TIMESTEP:  1904684 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9747 / AVG_LOSS: 0.00350 / MODE: exploit
EPISODE:   7760 / TIMESTEP:  1905030 / DURATION:   346 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2301 / AVG_LOSS: 0.00336 / MODE: exploit
EPISODE:   7761 / TIMESTEP:  1905222 / DURATION:   192 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8530 / AVG_LOSS: 0.00347 / MODE: exploit
EPISODE:   7762 / TIMESTEP:  1905528 / DURATION:   306 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   7811 / TIMESTEP:  1917821 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2625 / AVG_LOSS: 0.00334 / MODE: exploit
EPISODE:   7812 / TIMESTEP:  1918106 / DURATION:   285 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1174 / AVG_LOSS: 0.00324 / MODE: exploit
EPISODE:   7813 / TIMESTEP:  1918335 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9527 / AVG_LOSS: 0.00303 / MODE: exploit
EPISODE:   7814 / TIMESTEP:  1918618 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1191 / AVG_LOSS: 0.00399 / MODE: exploit
EPISODE:   7815 / TIMESTEP:  1918895 / DURATION:   277 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1287 / AVG_LOSS: 0.00412 / MODE: exploit
EPISODE:   7816 / TIMESTEP:  1919123 / DURATION:   228 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9991 / AVG_LOSS: 0.00330 / MODE: exploit
EPISODE:   7817 / TIMESTEP:  1919428 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   7866 / TIMESTEP:  1930713 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2133 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:   7867 / TIMESTEP:  1930944 / DURATION:   231 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9282 / AVG_LOSS: 0.00418 / MODE: exploit
EPISODE:   7868 / TIMESTEP:  1931311 / DURATION:   367 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0990 / AVG_LOSS: 0.00360 / MODE: exploit
EPISODE:   7869 / TIMESTEP:  1931549 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1409 / AVG_LOSS: 0.00354 / MODE: exploit
EPISODE:   7870 / TIMESTEP:  1931975 / DURATION:   426 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.2199 / AVG_LOSS: 0.00404 / MODE: exploit
EPISODE:   7871 / TIMESTEP:  1932337 / DURATION:   362 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0872 / AVG_LOSS: 0.00368 / MODE: exploit
EPISODE:   7872 / TIMESTEP:  1932512 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   7921 / TIMESTEP:  1944261 / DURATION:   245 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9698 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   7922 / TIMESTEP:  1944501 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9967 / AVG_LOSS: 0.00319 / MODE: exploit
EPISODE:   7923 / TIMESTEP:  1944742 / DURATION:   241 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9452 / AVG_LOSS: 0.00352 / MODE: exploit
EPISODE:   7924 / TIMESTEP:  1944981 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0756 / AVG_LOSS: 0.00351 / MODE: exploit
EPISODE:   7925 / TIMESTEP:  1945220 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9522 / AVG_LOSS: 0.00368 / MODE: exploit
EPISODE:   7926 / TIMESTEP:  1945463 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9742 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:   7927 / TIMESTEP:  1945631 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   7976 / TIMESTEP:  1959451 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8633 / AVG_LOSS: 0.00293 / MODE: exploit
EPISODE:   7977 / TIMESTEP:  1959607 / DURATION:   156 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9361 / AVG_LOSS: 0.00368 / MODE: exploit
EPISODE:   7978 / TIMESTEP:  1959915 / DURATION:   308 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.1876 / AVG_LOSS: 0.00381 / MODE: exploit
EPISODE:   7979 / TIMESTEP:  1960379 / DURATION:   464 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.3590 / AVG_LOSS: 0.00402 / MODE: exploit
EPISODE:   7980 / TIMESTEP:  1960584 / DURATION:   205 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0004 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:   7981 / TIMESTEP:  1960821 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1971 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:   7982 / TIMESTEP:  1961106 / DURATION:   285 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   8031 / TIMESTEP:  1972642 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1771 / AVG_LOSS: 0.00333 / MODE: exploit
EPISODE:   8032 / TIMESTEP:  1972816 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8924 / AVG_LOSS: 0.00335 / MODE: exploit
EPISODE:   8033 / TIMESTEP:  1972982 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8848 / AVG_LOSS: 0.00420 / MODE: exploit
EPISODE:   8034 / TIMESTEP:  1973308 / DURATION:   326 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.1802 / AVG_LOSS: 0.00390 / MODE: exploit
EPISODE:   8035 / TIMESTEP:  1973484 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9159 / AVG_LOSS: 0.00315 / MODE: exploit
EPISODE:   8036 / TIMESTEP:  1973759 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0547 / AVG_LOSS: 0.00324 / MODE: exploit
EPISODE:   8037 / TIMESTEP:  1973932 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   8086 / TIMESTEP:  1985606 / DURATION:   270 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1970 / AVG_LOSS: 0.00349 / MODE: exploit
EPISODE:   8087 / TIMESTEP:  1985909 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1593 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   8088 / TIMESTEP:  1986111 / DURATION:   202 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9548 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:   8089 / TIMESTEP:  1986515 / DURATION:   404 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.3295 / AVG_LOSS: 0.00382 / MODE: exploit
EPISODE:   8090 / TIMESTEP:  1986858 / DURATION:   343 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.0623 / AVG_LOSS: 0.00308 / MODE: exploit
EPISODE:   8091 / TIMESTEP:  1987095 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2142 / AVG_LOSS: 0.00361 / MODE: exploit
EPISODE:   8092 / TIMESTEP:  1987305 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   8141 / TIMESTEP:  1999703 / DURATION:   371 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.2341 / AVG_LOSS: 0.00338 / MODE: exploit
EPISODE:   8142 / TIMESTEP:  1999935 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1705 / AVG_LOSS: 0.00320 / MODE: exploit
EPISODE:   8143 / TIMESTEP:  2000190 / DURATION:   255 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9460 / AVG_LOSS: 0.00287 / MODE: exploit
EPISODE:   8144 / TIMESTEP:  2000461 / DURATION:   271 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0826 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:   8145 / TIMESTEP:  2000740 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2093 / AVG_LOSS: 0.00351 / MODE: exploit
EPISODE:   8146 / TIMESTEP:  2001112 / DURATION:   372 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.2285 / AVG_LOSS: 0.00307 / MODE: exploit
EPISODE:   8147 / TIMESTEP:  2001294 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   8196 / TIMESTEP:  2013840 / DURATION:   410 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.2381 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:   8197 / TIMESTEP:  2014334 / DURATION:   494 / EPSILON: 1.00000 / TOTAL_REWARD:   6 / AVG_MAX_Q: 0.9837 / AVG_LOSS: 0.00270 / MODE: exploit
EPISODE:   8198 / TIMESTEP:  2014616 / DURATION:   282 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0558 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   8199 / TIMESTEP:  2014872 / DURATION:   256 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1865 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:   8200 / TIMESTEP:  2015039 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9610 / AVG_LOSS: 0.00275 / MODE: exploit
EPISODE:   8201 / TIMESTEP:  2015216 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.8891 / AVG_LOSS: 0.00390 / MODE: exploit
EPISODE:   8202 / TIMESTEP:  2015428 / DURATION:   212 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   8251 / TIMESTEP:  2027685 / DURATION:   349 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2292 / AVG_LOSS: 0.00361 / MODE: exploit
EPISODE:   8252 / TIMESTEP:  2027959 / DURATION:   274 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0869 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:   8253 / TIMESTEP:  2028189 / DURATION:   230 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9884 / AVG_LOSS: 0.00309 / MODE: exploit
EPISODE:   8254 / TIMESTEP:  2028453 / DURATION:   264 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1498 / AVG_LOSS: 0.00287 / MODE: exploit
EPISODE:   8255 / TIMESTEP:  2028620 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9824 / AVG_LOSS: 0.00356 / MODE: exploit
EPISODE:   8256 / TIMESTEP:  2028839 / DURATION:   219 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0615 / AVG_LOSS: 0.00371 / MODE: exploit
EPISODE:   8257 / TIMESTEP:  2029169 / DURATION:   330 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   8306 / TIMESTEP:  2040540 / DURATION:   513 / EPSILON: 1.00000 / TOTAL_REWARD:   6 / AVG_MAX_Q: 1.3166 / AVG_LOSS: 0.00294 / MODE: exploit
EPISODE:   8307 / TIMESTEP:  2040755 / DURATION:   215 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1113 / AVG_LOSS: 0.00350 / MODE: exploit
EPISODE:   8308 / TIMESTEP:  2040933 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9189 / AVG_LOSS: 0.00339 / MODE: exploit
EPISODE:   8309 / TIMESTEP:  2041192 / DURATION:   259 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1518 / AVG_LOSS: 0.00296 / MODE: exploit
EPISODE:   8310 / TIMESTEP:  2041490 / DURATION:   298 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2421 / AVG_LOSS: 0.00323 / MODE: exploit
EPISODE:   8311 / TIMESTEP:  2041670 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9764 / AVG_LOSS: 0.00256 / MODE: exploit
EPISODE:   8312 / TIMESTEP:  2041853 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   8361 / TIMESTEP:  2054280 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0035 / AVG_LOSS: 0.00274 / MODE: exploit
EPISODE:   8362 / TIMESTEP:  2054519 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0874 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:   8363 / TIMESTEP:  2054695 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9578 / AVG_LOSS: 0.00346 / MODE: exploit
EPISODE:   8364 / TIMESTEP:  2055144 / DURATION:   449 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.1952 / AVG_LOSS: 0.00314 / MODE: exploit
EPISODE:   8365 / TIMESTEP:  2055458 / DURATION:   314 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1365 / AVG_LOSS: 0.00328 / MODE: exploit
EPISODE:   8366 / TIMESTEP:  2055883 / DURATION:   425 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.2395 / AVG_LOSS: 0.00316 / MODE: exploit
EPISODE:   8367 / TIMESTEP:  2056119 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   8416 / TIMESTEP:  2068938 / DURATION:   278 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1277 / AVG_LOSS: 0.00294 / MODE: exploit
EPISODE:   8417 / TIMESTEP:  2069197 / DURATION:   259 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2235 / AVG_LOSS: 0.00327 / MODE: exploit
EPISODE:   8418 / TIMESTEP:  2069435 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2197 / AVG_LOSS: 0.00314 / MODE: exploit
EPISODE:   8419 / TIMESTEP:  2069758 / DURATION:   323 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2147 / AVG_LOSS: 0.00339 / MODE: exploit
EPISODE:   8420 / TIMESTEP:  2069925 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0173 / AVG_LOSS: 0.00262 / MODE: exploit
EPISODE:   8421 / TIMESTEP:  2070111 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9511 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   8422 / TIMESTEP:  2070551 / DURATION:   440 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_

EPISODE:   8471 / TIMESTEP:  2082147 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0742 / AVG_LOSS: 0.00349 / MODE: exploit
EPISODE:   8472 / TIMESTEP:  2082407 / DURATION:   260 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.1120 / AVG_LOSS: 0.00312 / MODE: exploit
EPISODE:   8473 / TIMESTEP:  2082712 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.0875 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   8474 / TIMESTEP:  2082958 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0890 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:   8475 / TIMESTEP:  2083145 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0079 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   8476 / TIMESTEP:  2083371 / DURATION:   226 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1362 / AVG_LOSS: 0.00252 / MODE: exploit
EPISODE:   8477 / TIMESTEP:  2083543 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   8526 / TIMESTEP:  2095465 / DURATION:   337 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3178 / AVG_LOSS: 0.00275 / MODE: exploit
EPISODE:   8527 / TIMESTEP:  2095694 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0626 / AVG_LOSS: 0.00309 / MODE: exploit
EPISODE:   8528 / TIMESTEP:  2095906 / DURATION:   212 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0261 / AVG_LOSS: 0.00358 / MODE: exploit
EPISODE:   8529 / TIMESTEP:  2096123 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0975 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   8530 / TIMESTEP:  2096351 / DURATION:   228 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 0.9854 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:   8531 / TIMESTEP:  2096646 / DURATION:   295 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2698 / AVG_LOSS: 0.00347 / MODE: exploit
EPISODE:   8532 / TIMESTEP:  2096842 / DURATION:   196 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   8581 / TIMESTEP:  2110208 / DURATION:   293 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2689 / AVG_LOSS: 0.00310 / MODE: exploit
EPISODE:   8582 / TIMESTEP:  2110563 / DURATION:   355 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3231 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   8583 / TIMESTEP:  2110948 / DURATION:   385 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.3564 / AVG_LOSS: 0.00367 / MODE: exploit
EPISODE:   8584 / TIMESTEP:  2111224 / DURATION:   276 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2921 / AVG_LOSS: 0.00363 / MODE: exploit
EPISODE:   8585 / TIMESTEP:  2111439 / DURATION:   215 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0455 / AVG_LOSS: 0.00332 / MODE: exploit
EPISODE:   8586 / TIMESTEP:  2111663 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1274 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:   8587 / TIMESTEP:  2111986 / DURATION:   323 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   8636 / TIMESTEP:  2123461 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2408 / AVG_LOSS: 0.00303 / MODE: exploit
EPISODE:   8637 / TIMESTEP:  2123681 / DURATION:   220 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1298 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   8638 / TIMESTEP:  2123876 / DURATION:   195 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2180 / AVG_LOSS: 0.00363 / MODE: exploit
EPISODE:   8639 / TIMESTEP:  2124220 / DURATION:   344 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3767 / AVG_LOSS: 0.00280 / MODE: exploit
EPISODE:   8640 / TIMESTEP:  2124438 / DURATION:   218 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1029 / AVG_LOSS: 0.00333 / MODE: exploit
EPISODE:   8641 / TIMESTEP:  2124736 / DURATION:   298 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.1441 / AVG_LOSS: 0.00323 / MODE: exploit
EPISODE:   8642 / TIMESTEP:  2125014 / DURATION:   278 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   8691 / TIMESTEP:  2137025 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2682 / AVG_LOSS: 0.00331 / MODE: exploit
EPISODE:   8692 / TIMESTEP:  2137237 / DURATION:   212 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1816 / AVG_LOSS: 0.00261 / MODE: exploit
EPISODE:   8693 / TIMESTEP:  2137410 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 0.9673 / AVG_LOSS: 0.00244 / MODE: exploit
EPISODE:   8694 / TIMESTEP:  2137806 / DURATION:   396 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.3175 / AVG_LOSS: 0.00318 / MODE: exploit
EPISODE:   8695 / TIMESTEP:  2138028 / DURATION:   222 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1144 / AVG_LOSS: 0.00288 / MODE: exploit
EPISODE:   8696 / TIMESTEP:  2138331 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.1458 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:   8697 / TIMESTEP:  2138814 / DURATION:   483 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_

EPISODE:   8746 / TIMESTEP:  2150523 / DURATION:   271 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3143 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:   8747 / TIMESTEP:  2150707 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0731 / AVG_LOSS: 0.00330 / MODE: exploit
EPISODE:   8748 / TIMESTEP:  2150920 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1096 / AVG_LOSS: 0.00410 / MODE: exploit
EPISODE:   8749 / TIMESTEP:  2151117 / DURATION:   197 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1409 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   8750 / TIMESTEP:  2151425 / DURATION:   308 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3215 / AVG_LOSS: 0.00300 / MODE: exploit
EPISODE:   8751 / TIMESTEP:  2151669 / DURATION:   244 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0307 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   8752 / TIMESTEP:  2151853 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   8801 / TIMESTEP:  2164204 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0135 / AVG_LOSS: 0.00353 / MODE: exploit
EPISODE:   8802 / TIMESTEP:  2164376 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1132 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:   8803 / TIMESTEP:  2164656 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2142 / AVG_LOSS: 0.00356 / MODE: exploit
EPISODE:   8804 / TIMESTEP:  2164820 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0479 / AVG_LOSS: 0.00338 / MODE: exploit
EPISODE:   8805 / TIMESTEP:  2165154 / DURATION:   334 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2779 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   8806 / TIMESTEP:  2165456 / DURATION:   302 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3716 / AVG_LOSS: 0.00309 / MODE: exploit
EPISODE:   8807 / TIMESTEP:  2165734 / DURATION:   278 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   8856 / TIMESTEP:  2177666 / DURATION:   245 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1429 / AVG_LOSS: 0.00334 / MODE: exploit
EPISODE:   8857 / TIMESTEP:  2177833 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1114 / AVG_LOSS: 0.00318 / MODE: exploit
EPISODE:   8858 / TIMESTEP:  2178062 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2317 / AVG_LOSS: 0.00336 / MODE: exploit
EPISODE:   8859 / TIMESTEP:  2178374 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2960 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:   8860 / TIMESTEP:  2178827 / DURATION:   453 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.4164 / AVG_LOSS: 0.00310 / MODE: exploit
EPISODE:   8861 / TIMESTEP:  2179060 / DURATION:   233 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2881 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   8862 / TIMESTEP:  2179332 / DURATION:   272 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   8911 / TIMESTEP:  2191408 / DURATION:   272 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3949 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:   8912 / TIMESTEP:  2191675 / DURATION:   267 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2153 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   8913 / TIMESTEP:  2191976 / DURATION:   301 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4149 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:   8914 / TIMESTEP:  2192358 / DURATION:   382 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2332 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:   8915 / TIMESTEP:  2192566 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1173 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:   8916 / TIMESTEP:  2192740 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1122 / AVG_LOSS: 0.00272 / MODE: exploit
EPISODE:   8917 / TIMESTEP:  2192925 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   8966 / TIMESTEP:  2204919 / DURATION:   363 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.1923 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:   8967 / TIMESTEP:  2205238 / DURATION:   319 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3136 / AVG_LOSS: 0.00333 / MODE: exploit
EPISODE:   8968 / TIMESTEP:  2205713 / DURATION:   475 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.4723 / AVG_LOSS: 0.00293 / MODE: exploit
EPISODE:   8969 / TIMESTEP:  2206014 / DURATION:   301 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3292 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:   8970 / TIMESTEP:  2206181 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1045 / AVG_LOSS: 0.00264 / MODE: exploit
EPISODE:   8971 / TIMESTEP:  2206360 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0739 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:   8972 / TIMESTEP:  2206642 / DURATION:   282 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   9021 / TIMESTEP:  2219350 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1440 / AVG_LOSS: 0.00227 / MODE: exploit
EPISODE:   9022 / TIMESTEP:  2219515 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0723 / AVG_LOSS: 0.00337 / MODE: exploit
EPISODE:   9023 / TIMESTEP:  2219756 / DURATION:   241 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2072 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   9024 / TIMESTEP:  2220103 / DURATION:   347 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3892 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:   9025 / TIMESTEP:  2220444 / DURATION:   341 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3162 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   9026 / TIMESTEP:  2220736 / DURATION:   292 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3039 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:   9027 / TIMESTEP:  2221041 / DURATION:   305 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   9076 / TIMESTEP:  2232150 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0832 / AVG_LOSS: 0.00242 / MODE: exploit
EPISODE:   9077 / TIMESTEP:  2232461 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2459 / AVG_LOSS: 0.00312 / MODE: exploit
EPISODE:   9078 / TIMESTEP:  2232699 / DURATION:   238 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3065 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:   9079 / TIMESTEP:  2232906 / DURATION:   207 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.0940 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:   9080 / TIMESTEP:  2233308 / DURATION:   402 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.2976 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:   9081 / TIMESTEP:  2233525 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1260 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:   9082 / TIMESTEP:  2234013 / DURATION:   488 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_

EPISODE:   9131 / TIMESTEP:  2246880 / DURATION:   332 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3091 / AVG_LOSS: 0.00273 / MODE: exploit
EPISODE:   9132 / TIMESTEP:  2247232 / DURATION:   352 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3693 / AVG_LOSS: 0.00316 / MODE: exploit
EPISODE:   9133 / TIMESTEP:  2247410 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1013 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:   9134 / TIMESTEP:  2247582 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1244 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:   9135 / TIMESTEP:  2247769 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0702 / AVG_LOSS: 0.00323 / MODE: exploit
EPISODE:   9136 / TIMESTEP:  2248075 / DURATION:   306 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3181 / AVG_LOSS: 0.00310 / MODE: exploit
EPISODE:   9137 / TIMESTEP:  2248355 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   9186 / TIMESTEP:  2261106 / DURATION:   205 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1939 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:   9187 / TIMESTEP:  2261421 / DURATION:   315 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3239 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:   9188 / TIMESTEP:  2261714 / DURATION:   293 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3969 / AVG_LOSS: 0.00327 / MODE: exploit
EPISODE:   9189 / TIMESTEP:  2261939 / DURATION:   225 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1686 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   9190 / TIMESTEP:  2262186 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2310 / AVG_LOSS: 0.00270 / MODE: exploit
EPISODE:   9191 / TIMESTEP:  2262438 / DURATION:   252 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2714 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:   9192 / TIMESTEP:  2262760 / DURATION:   322 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   9241 / TIMESTEP:  2275280 / DURATION:   276 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3380 / AVG_LOSS: 0.00303 / MODE: exploit
EPISODE:   9242 / TIMESTEP:  2275493 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2314 / AVG_LOSS: 0.00371 / MODE: exploit
EPISODE:   9243 / TIMESTEP:  2275709 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2158 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:   9244 / TIMESTEP:  2276015 / DURATION:   306 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2675 / AVG_LOSS: 0.00303 / MODE: exploit
EPISODE:   9245 / TIMESTEP:  2276316 / DURATION:   301 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4045 / AVG_LOSS: 0.00354 / MODE: exploit
EPISODE:   9246 / TIMESTEP:  2276548 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3370 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:   9247 / TIMESTEP:  2276859 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   9296 / TIMESTEP:  2289567 / DURATION:   310 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3387 / AVG_LOSS: 0.00295 / MODE: exploit
EPISODE:   9297 / TIMESTEP:  2289808 / DURATION:   241 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2281 / AVG_LOSS: 0.00303 / MODE: exploit
EPISODE:   9298 / TIMESTEP:  2290078 / DURATION:   270 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1608 / AVG_LOSS: 0.00220 / MODE: exploit
EPISODE:   9299 / TIMESTEP:  2290331 / DURATION:   253 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3489 / AVG_LOSS: 0.00275 / MODE: exploit
EPISODE:   9300 / TIMESTEP:  2290742 / DURATION:   411 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.2924 / AVG_LOSS: 0.00319 / MODE: exploit
EPISODE:   9301 / TIMESTEP:  2290910 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.0454 / AVG_LOSS: 0.00326 / MODE: exploit
EPISODE:   9302 / TIMESTEP:  2291086 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   9351 / TIMESTEP:  2303027 / DURATION:   221 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1686 / AVG_LOSS: 0.00250 / MODE: exploit
EPISODE:   9352 / TIMESTEP:  2303286 / DURATION:   259 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3519 / AVG_LOSS: 0.00287 / MODE: exploit
EPISODE:   9353 / TIMESTEP:  2303559 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2121 / AVG_LOSS: 0.00301 / MODE: exploit
EPISODE:   9354 / TIMESTEP:  2303732 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1101 / AVG_LOSS: 0.00331 / MODE: exploit
EPISODE:   9355 / TIMESTEP:  2304011 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2846 / AVG_LOSS: 0.00258 / MODE: exploit
EPISODE:   9356 / TIMESTEP:  2304239 / DURATION:   228 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2525 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:   9357 / TIMESTEP:  2304428 / DURATION:   189 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   9406 / TIMESTEP:  2316450 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2167 / AVG_LOSS: 0.00269 / MODE: exploit
EPISODE:   9407 / TIMESTEP:  2316631 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1848 / AVG_LOSS: 0.00356 / MODE: exploit
EPISODE:   9408 / TIMESTEP:  2316803 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1385 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:   9409 / TIMESTEP:  2317032 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2166 / AVG_LOSS: 0.00288 / MODE: exploit
EPISODE:   9410 / TIMESTEP:  2317465 / DURATION:   433 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.3848 / AVG_LOSS: 0.00287 / MODE: exploit
EPISODE:   9411 / TIMESTEP:  2317847 / DURATION:   382 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3599 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:   9412 / TIMESTEP:  2318184 / DURATION:   337 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:   9461 / TIMESTEP:  2330553 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1333 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   9462 / TIMESTEP:  2330789 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2905 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:   9463 / TIMESTEP:  2330960 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1711 / AVG_LOSS: 0.00299 / MODE: exploit
EPISODE:   9464 / TIMESTEP:  2331201 / DURATION:   241 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3621 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:   9465 / TIMESTEP:  2331375 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1763 / AVG_LOSS: 0.00294 / MODE: exploit
EPISODE:   9466 / TIMESTEP:  2331592 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2677 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:   9467 / TIMESTEP:  2331764 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   9516 / TIMESTEP:  2344395 / DURATION:   281 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3932 / AVG_LOSS: 0.00258 / MODE: exploit
EPISODE:   9517 / TIMESTEP:  2344682 / DURATION:   287 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3273 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   9518 / TIMESTEP:  2344990 / DURATION:   308 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2999 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:   9519 / TIMESTEP:  2345368 / DURATION:   378 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.4523 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:   9520 / TIMESTEP:  2345623 / DURATION:   255 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1102 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:   9521 / TIMESTEP:  2345968 / DURATION:   345 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2383 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:   9522 / TIMESTEP:  2346204 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   9571 / TIMESTEP:  2357958 / DURATION:   228 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1106 / AVG_LOSS: 0.00263 / MODE: exploit
EPISODE:   9572 / TIMESTEP:  2358141 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1336 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:   9573 / TIMESTEP:  2358326 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1606 / AVG_LOSS: 0.00229 / MODE: exploit
EPISODE:   9574 / TIMESTEP:  2358554 / DURATION:   228 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2373 / AVG_LOSS: 0.00242 / MODE: exploit
EPISODE:   9575 / TIMESTEP:  2358720 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1528 / AVG_LOSS: 0.00328 / MODE: exploit
EPISODE:   9576 / TIMESTEP:  2358995 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3126 / AVG_LOSS: 0.00255 / MODE: exploit
EPISODE:   9577 / TIMESTEP:  2359231 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   9626 / TIMESTEP:  2371906 / DURATION:   306 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.4389 / AVG_LOSS: 0.00319 / MODE: exploit
EPISODE:   9627 / TIMESTEP:  2372146 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2381 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:   9628 / TIMESTEP:  2372381 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3597 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:   9629 / TIMESTEP:  2372629 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3213 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:   9630 / TIMESTEP:  2372813 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1074 / AVG_LOSS: 0.00376 / MODE: exploit
EPISODE:   9631 / TIMESTEP:  2373024 / DURATION:   211 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3143 / AVG_LOSS: 0.00296 / MODE: exploit
EPISODE:   9632 / TIMESTEP:  2373237 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   9681 / TIMESTEP:  2385502 / DURATION:   333 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2971 / AVG_LOSS: 0.00291 / MODE: exploit
EPISODE:   9682 / TIMESTEP:  2385714 / DURATION:   212 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2621 / AVG_LOSS: 0.00211 / MODE: exploit
EPISODE:   9683 / TIMESTEP:  2386032 / DURATION:   318 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3146 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:   9684 / TIMESTEP:  2386305 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3612 / AVG_LOSS: 0.00237 / MODE: exploit
EPISODE:   9685 / TIMESTEP:  2386486 / DURATION:   181 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1242 / AVG_LOSS: 0.00256 / MODE: exploit
EPISODE:   9686 / TIMESTEP:  2386674 / DURATION:   188 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1230 / AVG_LOSS: 0.00339 / MODE: exploit
EPISODE:   9687 / TIMESTEP:  2386942 / DURATION:   268 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   9736 / TIMESTEP:  2398219 / DURATION:   198 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1188 / AVG_LOSS: 0.00237 / MODE: exploit
EPISODE:   9737 / TIMESTEP:  2398503 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2094 / AVG_LOSS: 0.00303 / MODE: exploit
EPISODE:   9738 / TIMESTEP:  2398758 / DURATION:   255 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4093 / AVG_LOSS: 0.00249 / MODE: exploit
EPISODE:   9739 / TIMESTEP:  2398933 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1029 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:   9740 / TIMESTEP:  2399117 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1772 / AVG_LOSS: 0.00291 / MODE: exploit
EPISODE:   9741 / TIMESTEP:  2399283 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1167 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:   9742 / TIMESTEP:  2399448 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:   9791 / TIMESTEP:  2411005 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4318 / AVG_LOSS: 0.00296 / MODE: exploit
EPISODE:   9792 / TIMESTEP:  2411178 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2041 / AVG_LOSS: 0.00245 / MODE: exploit
EPISODE:   9793 / TIMESTEP:  2411361 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1210 / AVG_LOSS: 0.00241 / MODE: exploit
EPISODE:   9794 / TIMESTEP:  2411660 / DURATION:   299 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3272 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:   9795 / TIMESTEP:  2411901 / DURATION:   241 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2637 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:   9796 / TIMESTEP:  2412109 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1978 / AVG_LOSS: 0.00264 / MODE: exploit
EPISODE:   9797 / TIMESTEP:  2412320 / DURATION:   211 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   9846 / TIMESTEP:  2424717 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2594 / AVG_LOSS: 0.00273 / MODE: exploit
EPISODE:   9847 / TIMESTEP:  2424942 / DURATION:   225 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1502 / AVG_LOSS: 0.00267 / MODE: exploit
EPISODE:   9848 / TIMESTEP:  2425117 / DURATION:   175 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1710 / AVG_LOSS: 0.00287 / MODE: exploit
EPISODE:   9849 / TIMESTEP:  2425358 / DURATION:   241 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1300 / AVG_LOSS: 0.00288 / MODE: exploit
EPISODE:   9850 / TIMESTEP:  2425634 / DURATION:   276 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4133 / AVG_LOSS: 0.00272 / MODE: exploit
EPISODE:   9851 / TIMESTEP:  2425927 / DURATION:   293 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2869 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:   9852 / TIMESTEP:  2426207 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:   9901 / TIMESTEP:  2438141 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1452 / AVG_LOSS: 0.00299 / MODE: exploit
EPISODE:   9902 / TIMESTEP:  2438338 / DURATION:   197 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1558 / AVG_LOSS: 0.00258 / MODE: exploit
EPISODE:   9903 / TIMESTEP:  2438590 / DURATION:   252 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2554 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:   9904 / TIMESTEP:  2438891 / DURATION:   301 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4417 / AVG_LOSS: 0.00264 / MODE: exploit
EPISODE:   9905 / TIMESTEP:  2439067 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1525 / AVG_LOSS: 0.00332 / MODE: exploit
EPISODE:   9906 / TIMESTEP:  2439260 / DURATION:   193 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1120 / AVG_LOSS: 0.00332 / MODE: exploit
EPISODE:   9907 / TIMESTEP:  2439502 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:   9956 / TIMESTEP:  2451467 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1833 / AVG_LOSS: 0.00316 / MODE: exploit
EPISODE:   9957 / TIMESTEP:  2451745 / DURATION:   278 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3382 / AVG_LOSS: 0.00264 / MODE: exploit
EPISODE:   9958 / TIMESTEP:  2451979 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3618 / AVG_LOSS: 0.00301 / MODE: exploit
EPISODE:   9959 / TIMESTEP:  2452155 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1431 / AVG_LOSS: 0.00312 / MODE: exploit
EPISODE:   9960 / TIMESTEP:  2452382 / DURATION:   227 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4085 / AVG_LOSS: 0.00355 / MODE: exploit
EPISODE:   9961 / TIMESTEP:  2452745 / DURATION:   363 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2644 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:   9962 / TIMESTEP:  2453154 / DURATION:   409 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_

EPISODE:  10011 / TIMESTEP:  2464801 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1940 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:  10012 / TIMESTEP:  2465114 / DURATION:   313 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.5376 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:  10013 / TIMESTEP:  2465456 / DURATION:   342 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3589 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:  10014 / TIMESTEP:  2465627 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1939 / AVG_LOSS: 0.00256 / MODE: exploit
EPISODE:  10015 / TIMESTEP:  2465884 / DURATION:   257 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2655 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:  10016 / TIMESTEP:  2466218 / DURATION:   334 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.5491 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:  10017 / TIMESTEP:  2466370 / DURATION:   152 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  10066 / TIMESTEP:  2479038 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4159 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:  10067 / TIMESTEP:  2479205 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1666 / AVG_LOSS: 0.00317 / MODE: exploit
EPISODE:  10068 / TIMESTEP:  2479485 / DURATION:   280 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2669 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:  10069 / TIMESTEP:  2479816 / DURATION:   331 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.2594 / AVG_LOSS: 0.00300 / MODE: exploit
EPISODE:  10070 / TIMESTEP:  2479990 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1697 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:  10071 / TIMESTEP:  2480170 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1511 / AVG_LOSS: 0.00288 / MODE: exploit
EPISODE:  10072 / TIMESTEP:  2480409 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  10121 / TIMESTEP:  2492322 / DURATION:   196 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2126 / AVG_LOSS: 0.00288 / MODE: exploit
EPISODE:  10122 / TIMESTEP:  2492704 / DURATION:   382 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.5474 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:  10123 / TIMESTEP:  2492953 / DURATION:   249 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4097 / AVG_LOSS: 0.00251 / MODE: exploit
EPISODE:  10124 / TIMESTEP:  2493200 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2412 / AVG_LOSS: 0.00269 / MODE: exploit
EPISODE:  10125 / TIMESTEP:  2493460 / DURATION:   260 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3037 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:  10126 / TIMESTEP:  2493647 / DURATION:   187 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1258 / AVG_LOSS: 0.00287 / MODE: exploit
EPISODE:  10127 / TIMESTEP:  2493963 / DURATION:   316 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:  10176 / TIMESTEP:  2506486 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3323 / AVG_LOSS: 0.00301 / MODE: exploit
EPISODE:  10177 / TIMESTEP:  2506705 / DURATION:   219 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2806 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:  10178 / TIMESTEP:  2507128 / DURATION:   423 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.4913 / AVG_LOSS: 0.00284 / MODE: exploit
EPISODE:  10179 / TIMESTEP:  2507481 / DURATION:   353 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3686 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:  10180 / TIMESTEP:  2507783 / DURATION:   302 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4277 / AVG_LOSS: 0.00310 / MODE: exploit
EPISODE:  10181 / TIMESTEP:  2507962 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1448 / AVG_LOSS: 0.00240 / MODE: exploit
EPISODE:  10182 / TIMESTEP:  2508246 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:  10231 / TIMESTEP:  2520859 / DURATION:   345 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.2741 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:  10232 / TIMESTEP:  2521031 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2197 / AVG_LOSS: 0.00261 / MODE: exploit
EPISODE:  10233 / TIMESTEP:  2521278 / DURATION:   247 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3738 / AVG_LOSS: 0.00222 / MODE: exploit
EPISODE:  10234 / TIMESTEP:  2521486 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2291 / AVG_LOSS: 0.00236 / MODE: exploit
EPISODE:  10235 / TIMESTEP:  2521795 / DURATION:   309 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3964 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:  10236 / TIMESTEP:  2521980 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1089 / AVG_LOSS: 0.00263 / MODE: exploit
EPISODE:  10237 / TIMESTEP:  2522195 / DURATION:   215 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  10286 / TIMESTEP:  2534736 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3799 / AVG_LOSS: 0.00279 / MODE: exploit
EPISODE:  10287 / TIMESTEP:  2534969 / DURATION:   233 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3506 / AVG_LOSS: 0.00272 / MODE: exploit
EPISODE:  10288 / TIMESTEP:  2535141 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1790 / AVG_LOSS: 0.00299 / MODE: exploit
EPISODE:  10289 / TIMESTEP:  2535307 / DURATION:   166 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1361 / AVG_LOSS: 0.00317 / MODE: exploit
EPISODE:  10290 / TIMESTEP:  2535599 / DURATION:   292 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4101 / AVG_LOSS: 0.00262 / MODE: exploit
EPISODE:  10291 / TIMESTEP:  2535776 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1335 / AVG_LOSS: 0.00249 / MODE: exploit
EPISODE:  10292 / TIMESTEP:  2536142 / DURATION:   366 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:  10341 / TIMESTEP:  2548338 / DURATION:   277 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3322 / AVG_LOSS: 0.00268 / MODE: exploit
EPISODE:  10342 / TIMESTEP:  2548645 / DURATION:   307 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4882 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:  10343 / TIMESTEP:  2548817 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1401 / AVG_LOSS: 0.00291 / MODE: exploit
EPISODE:  10344 / TIMESTEP:  2548989 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1473 / AVG_LOSS: 0.00250 / MODE: exploit
EPISODE:  10345 / TIMESTEP:  2549291 / DURATION:   302 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4038 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:  10346 / TIMESTEP:  2549539 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3618 / AVG_LOSS: 0.00330 / MODE: exploit
EPISODE:  10347 / TIMESTEP:  2549795 / DURATION:   256 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  10396 / TIMESTEP:  2561337 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1590 / AVG_LOSS: 0.00245 / MODE: exploit
EPISODE:  10397 / TIMESTEP:  2561520 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1138 / AVG_LOSS: 0.00351 / MODE: exploit
EPISODE:  10398 / TIMESTEP:  2561783 / DURATION:   263 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4676 / AVG_LOSS: 0.00316 / MODE: exploit
EPISODE:  10399 / TIMESTEP:  2561960 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2697 / AVG_LOSS: 0.00230 / MODE: exploit
EPISODE:  10400 / TIMESTEP:  2562263 / DURATION:   303 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3493 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:  10401 / TIMESTEP:  2562512 / DURATION:   249 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3956 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:  10402 / TIMESTEP:  2562682 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  10451 / TIMESTEP:  2574286 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1880 / AVG_LOSS: 0.00309 / MODE: exploit
EPISODE:  10452 / TIMESTEP:  2574514 / DURATION:   228 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2551 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:  10453 / TIMESTEP:  2574690 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2395 / AVG_LOSS: 0.00268 / MODE: exploit
EPISODE:  10454 / TIMESTEP:  2574969 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4620 / AVG_LOSS: 0.00291 / MODE: exploit
EPISODE:  10455 / TIMESTEP:  2575180 / DURATION:   211 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2494 / AVG_LOSS: 0.00284 / MODE: exploit
EPISODE:  10456 / TIMESTEP:  2575352 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2778 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:  10457 / TIMESTEP:  2575558 / DURATION:   206 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  10506 / TIMESTEP:  2587598 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2280 / AVG_LOSS: 0.00296 / MODE: exploit
EPISODE:  10507 / TIMESTEP:  2587771 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2852 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:  10508 / TIMESTEP:  2588000 / DURATION:   229 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2787 / AVG_LOSS: 0.00334 / MODE: exploit
EPISODE:  10509 / TIMESTEP:  2588179 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1241 / AVG_LOSS: 0.00312 / MODE: exploit
EPISODE:  10510 / TIMESTEP:  2588526 / DURATION:   347 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3847 / AVG_LOSS: 0.00362 / MODE: exploit
EPISODE:  10511 / TIMESTEP:  2588889 / DURATION:   363 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.6367 / AVG_LOSS: 0.00315 / MODE: exploit
EPISODE:  10512 / TIMESTEP:  2589173 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:  10561 / TIMESTEP:  2601346 / DURATION:   149 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1562 / AVG_LOSS: 0.00326 / MODE: exploit
EPISODE:  10562 / TIMESTEP:  2601627 / DURATION:   281 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3813 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:  10563 / TIMESTEP:  2601884 / DURATION:   257 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4020 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:  10564 / TIMESTEP:  2602148 / DURATION:   264 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4919 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:  10565 / TIMESTEP:  2602444 / DURATION:   296 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.5805 / AVG_LOSS: 0.00305 / MODE: exploit
EPISODE:  10566 / TIMESTEP:  2602609 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3035 / AVG_LOSS: 0.00324 / MODE: exploit
EPISODE:  10567 / TIMESTEP:  2602772 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  10616 / TIMESTEP:  2615736 / DURATION:   206 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3638 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:  10617 / TIMESTEP:  2615949 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3203 / AVG_LOSS: 0.00289 / MODE: exploit
EPISODE:  10618 / TIMESTEP:  2616116 / DURATION:   167 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1967 / AVG_LOSS: 0.00279 / MODE: exploit
EPISODE:  10619 / TIMESTEP:  2616388 / DURATION:   272 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4083 / AVG_LOSS: 0.00320 / MODE: exploit
EPISODE:  10620 / TIMESTEP:  2616643 / DURATION:   255 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3373 / AVG_LOSS: 0.00317 / MODE: exploit
EPISODE:  10621 / TIMESTEP:  2616815 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2427 / AVG_LOSS: 0.00280 / MODE: exploit
EPISODE:  10622 / TIMESTEP:  2617126 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:  10671 / TIMESTEP:  2629373 / DURATION:   287 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3856 / AVG_LOSS: 0.00349 / MODE: exploit
EPISODE:  10672 / TIMESTEP:  2629608 / DURATION:   235 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3986 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:  10673 / TIMESTEP:  2629778 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2462 / AVG_LOSS: 0.00265 / MODE: exploit
EPISODE:  10674 / TIMESTEP:  2630089 / DURATION:   311 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.5044 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:  10675 / TIMESTEP:  2630251 / DURATION:   162 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2777 / AVG_LOSS: 0.00298 / MODE: exploit
EPISODE:  10676 / TIMESTEP:  2630491 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.1929 / AVG_LOSS: 0.00345 / MODE: exploit
EPISODE:  10677 / TIMESTEP:  2630766 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:  10726 / TIMESTEP:  2642650 / DURATION:   228 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4841 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:  10727 / TIMESTEP:  2642890 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4795 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:  10728 / TIMESTEP:  2643061 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2413 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:  10729 / TIMESTEP:  2643238 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2612 / AVG_LOSS: 0.00328 / MODE: exploit
EPISODE:  10730 / TIMESTEP:  2643457 / DURATION:   219 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2879 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:  10731 / TIMESTEP:  2643820 / DURATION:   363 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.4568 / AVG_LOSS: 0.00296 / MODE: exploit
EPISODE:  10732 / TIMESTEP:  2644093 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:  10781 / TIMESTEP:  2655286 / DURATION:   153 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2041 / AVG_LOSS: 0.00278 / MODE: exploit
EPISODE:  10782 / TIMESTEP:  2655570 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3476 / AVG_LOSS: 0.00266 / MODE: exploit
EPISODE:  10783 / TIMESTEP:  2655725 / DURATION:   155 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2129 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:  10784 / TIMESTEP:  2655939 / DURATION:   214 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2964 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:  10785 / TIMESTEP:  2656103 / DURATION:   164 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2753 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:  10786 / TIMESTEP:  2656361 / DURATION:   258 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3964 / AVG_LOSS: 0.00318 / MODE: exploit
EPISODE:  10787 / TIMESTEP:  2656625 / DURATION:   264 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:  10836 / TIMESTEP:  2669254 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2287 / AVG_LOSS: 0.00296 / MODE: exploit
EPISODE:  10837 / TIMESTEP:  2669570 / DURATION:   316 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.4314 / AVG_LOSS: 0.00284 / MODE: exploit
EPISODE:  10838 / TIMESTEP:  2669777 / DURATION:   207 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.2915 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:  10839 / TIMESTEP:  2669922 / DURATION:   145 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2959 / AVG_LOSS: 0.00355 / MODE: exploit
EPISODE:  10840 / TIMESTEP:  2670201 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3977 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:  10841 / TIMESTEP:  2670474 / DURATION:   273 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4054 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:  10842 / TIMESTEP:  2670658 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  10891 / TIMESTEP:  2683172 / DURATION:   186 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3543 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:  10892 / TIMESTEP:  2683441 / DURATION:   269 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4707 / AVG_LOSS: 0.00274 / MODE: exploit
EPISODE:  10893 / TIMESTEP:  2683624 / DURATION:   183 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2244 / AVG_LOSS: 0.00387 / MODE: exploit
EPISODE:  10894 / TIMESTEP:  2683926 / DURATION:   302 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.3643 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:  10895 / TIMESTEP:  2684243 / DURATION:   317 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.4071 / AVG_LOSS: 0.00342 / MODE: exploit
EPISODE:  10896 / TIMESTEP:  2684453 / DURATION:   210 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3247 / AVG_LOSS: 0.00335 / MODE: exploit
EPISODE:  10897 / TIMESTEP:  2684647 / DURATION:   194 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  10946 / TIMESTEP:  2696466 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3478 / AVG_LOSS: 0.00297 / MODE: exploit
EPISODE:  10947 / TIMESTEP:  2696644 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.1820 / AVG_LOSS: 0.00324 / MODE: exploit
EPISODE:  10948 / TIMESTEP:  2696820 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2545 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:  10949 / TIMESTEP:  2697126 / DURATION:   306 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.3616 / AVG_LOSS: 0.00336 / MODE: exploit
EPISODE:  10950 / TIMESTEP:  2697427 / DURATION:   301 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6099 / AVG_LOSS: 0.00363 / MODE: exploit
EPISODE:  10951 / TIMESTEP:  2697619 / DURATION:   192 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2807 / AVG_LOSS: 0.00385 / MODE: exploit
EPISODE:  10952 / TIMESTEP:  2698005 / DURATION:   386 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:  11001 / TIMESTEP:  2710414 / DURATION:   485 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_MAX_Q: 1.5869 / AVG_LOSS: 0.00357 / MODE: exploit
EPISODE:  11002 / TIMESTEP:  2710714 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6214 / AVG_LOSS: 0.00325 / MODE: exploit
EPISODE:  11003 / TIMESTEP:  2710960 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3805 / AVG_LOSS: 0.00316 / MODE: exploit
EPISODE:  11004 / TIMESTEP:  2711262 / DURATION:   302 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4459 / AVG_LOSS: 0.00272 / MODE: exploit
EPISODE:  11005 / TIMESTEP:  2711575 / DURATION:   313 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.4627 / AVG_LOSS: 0.00323 / MODE: exploit
EPISODE:  11006 / TIMESTEP:  2711744 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2245 / AVG_LOSS: 0.00294 / MODE: exploit
EPISODE:  11007 / TIMESTEP:  2712034 / DURATION:   290 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:  11056 / TIMESTEP:  2724279 / DURATION:   236 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4200 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:  11057 / TIMESTEP:  2724671 / DURATION:   392 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.7510 / AVG_LOSS: 0.00280 / MODE: exploit
EPISODE:  11058 / TIMESTEP:  2724842 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.4483 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:  11059 / TIMESTEP:  2725007 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3736 / AVG_LOSS: 0.00254 / MODE: exploit
EPISODE:  11060 / TIMESTEP:  2725338 / DURATION:   331 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.5277 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:  11061 / TIMESTEP:  2725489 / DURATION:   151 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2954 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:  11062 / TIMESTEP:  2725965 / DURATION:   476 / EPSILON: 1.00000 / TOTAL_REWARD:   5 / AVG_

EPISODE:  11111 / TIMESTEP:  2738650 / DURATION:   182 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.2632 / AVG_LOSS: 0.00351 / MODE: exploit
EPISODE:  11112 / TIMESTEP:  2738873 / DURATION:   223 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4714 / AVG_LOSS: 0.00302 / MODE: exploit
EPISODE:  11113 / TIMESTEP:  2739136 / DURATION:   263 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6353 / AVG_LOSS: 0.00332 / MODE: exploit
EPISODE:  11114 / TIMESTEP:  2739314 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3033 / AVG_LOSS: 0.00350 / MODE: exploit
EPISODE:  11115 / TIMESTEP:  2739656 / DURATION:   342 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.5495 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:  11116 / TIMESTEP:  2739825 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3173 / AVG_LOSS: 0.00283 / MODE: exploit
EPISODE:  11117 / TIMESTEP:  2740062 / DURATION:   237 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  11166 / TIMESTEP:  2752379 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5429 / AVG_LOSS: 0.00312 / MODE: exploit
EPISODE:  11167 / TIMESTEP:  2752619 / DURATION:   240 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5401 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:  11168 / TIMESTEP:  2753000 / DURATION:   381 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.4403 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:  11169 / TIMESTEP:  2753170 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3906 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:  11170 / TIMESTEP:  2753445 / DURATION:   275 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4580 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:  11171 / TIMESTEP:  2753876 / DURATION:   431 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.6502 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:  11172 / TIMESTEP:  2754168 / DURATION:   292 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_

EPISODE:  11221 / TIMESTEP:  2766812 / DURATION:   199 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3497 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:  11222 / TIMESTEP:  2766957 / DURATION:   145 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3805 / AVG_LOSS: 0.00249 / MODE: exploit
EPISODE:  11223 / TIMESTEP:  2767214 / DURATION:   257 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4663 / AVG_LOSS: 0.00293 / MODE: exploit
EPISODE:  11224 / TIMESTEP:  2767475 / DURATION:   261 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4850 / AVG_LOSS: 0.00336 / MODE: exploit
EPISODE:  11225 / TIMESTEP:  2767634 / DURATION:   159 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3755 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:  11226 / TIMESTEP:  2767887 / DURATION:   253 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4976 / AVG_LOSS: 0.00285 / MODE: exploit
EPISODE:  11227 / TIMESTEP:  2768027 / DURATION:   140 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  11276 / TIMESTEP:  2781485 / DURATION:   163 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.4154 / AVG_LOSS: 0.00309 / MODE: exploit
EPISODE:  11277 / TIMESTEP:  2781659 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3447 / AVG_LOSS: 0.00320 / MODE: exploit
EPISODE:  11278 / TIMESTEP:  2781835 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3816 / AVG_LOSS: 0.00330 / MODE: exploit
EPISODE:  11279 / TIMESTEP:  2782007 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.4606 / AVG_LOSS: 0.00327 / MODE: exploit
EPISODE:  11280 / TIMESTEP:  2782398 / DURATION:   391 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.5591 / AVG_LOSS: 0.00327 / MODE: exploit
EPISODE:  11281 / TIMESTEP:  2782619 / DURATION:   221 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5173 / AVG_LOSS: 0.00328 / MODE: exploit
EPISODE:  11282 / TIMESTEP:  2782934 / DURATION:   315 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_

EPISODE:  11331 / TIMESTEP:  2796323 / DURATION:   216 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4121 / AVG_LOSS: 0.00258 / MODE: exploit
EPISODE:  11332 / TIMESTEP:  2796536 / DURATION:   213 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4102 / AVG_LOSS: 0.00244 / MODE: exploit
EPISODE:  11333 / TIMESTEP:  2796714 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4491 / AVG_LOSS: 0.00275 / MODE: exploit
EPISODE:  11334 / TIMESTEP:  2796882 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3519 / AVG_LOSS: 0.00333 / MODE: exploit
EPISODE:  11335 / TIMESTEP:  2797125 / DURATION:   243 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5853 / AVG_LOSS: 0.00314 / MODE: exploit
EPISODE:  11336 / TIMESTEP:  2797364 / DURATION:   239 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5733 / AVG_LOSS: 0.00372 / MODE: exploit
EPISODE:  11337 / TIMESTEP:  2797582 / DURATION:   218 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  11386 / TIMESTEP:  2809393 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3168 / AVG_LOSS: 0.00299 / MODE: exploit
EPISODE:  11387 / TIMESTEP:  2809613 / DURATION:   220 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4806 / AVG_LOSS: 0.00275 / MODE: exploit
EPISODE:  11388 / TIMESTEP:  2809952 / DURATION:   339 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.6218 / AVG_LOSS: 0.00340 / MODE: exploit
EPISODE:  11389 / TIMESTEP:  2810159 / DURATION:   207 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5453 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:  11390 / TIMESTEP:  2810331 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.4467 / AVG_LOSS: 0.00306 / MODE: exploit
EPISODE:  11391 / TIMESTEP:  2810639 / DURATION:   308 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.5634 / AVG_LOSS: 0.00259 / MODE: exploit
EPISODE:  11392 / TIMESTEP:  2810854 / DURATION:   215 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  11441 / TIMESTEP:  2822324 / DURATION:   258 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.6188 / AVG_LOSS: 0.00349 / MODE: exploit
EPISODE:  11442 / TIMESTEP:  2822702 / DURATION:   378 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.7105 / AVG_LOSS: 0.00290 / MODE: exploit
EPISODE:  11443 / TIMESTEP:  2822887 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3919 / AVG_LOSS: 0.00370 / MODE: exploit
EPISODE:  11444 / TIMESTEP:  2823106 / DURATION:   219 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4714 / AVG_LOSS: 0.00343 / MODE: exploit
EPISODE:  11445 / TIMESTEP:  2823330 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5317 / AVG_LOSS: 0.00327 / MODE: exploit
EPISODE:  11446 / TIMESTEP:  2823614 / DURATION:   284 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.5038 / AVG_LOSS: 0.00276 / MODE: exploit
EPISODE:  11447 / TIMESTEP:  2823788 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  11496 / TIMESTEP:  2835486 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.6106 / AVG_LOSS: 0.00270 / MODE: exploit
EPISODE:  11497 / TIMESTEP:  2835691 / DURATION:   205 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4717 / AVG_LOSS: 0.00284 / MODE: exploit
EPISODE:  11498 / TIMESTEP:  2835909 / DURATION:   218 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.3501 / AVG_LOSS: 0.00268 / MODE: exploit
EPISODE:  11499 / TIMESTEP:  2836200 / DURATION:   291 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6645 / AVG_LOSS: 0.00270 / MODE: exploit
EPISODE:  11500 / TIMESTEP:  2836384 / DURATION:   184 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.4761 / AVG_LOSS: 0.00322 / MODE: exploit
EPISODE:  11501 / TIMESTEP:  2836569 / DURATION:   185 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3607 / AVG_LOSS: 0.00348 / MODE: exploit
EPISODE:  11502 / TIMESTEP:  2836739 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  11551 / TIMESTEP:  2848693 / DURATION:   161 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5220 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:  11552 / TIMESTEP:  2848863 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5354 / AVG_LOSS: 0.00328 / MODE: exploit
EPISODE:  11553 / TIMESTEP:  2849114 / DURATION:   251 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6585 / AVG_LOSS: 0.00303 / MODE: exploit
EPISODE:  11554 / TIMESTEP:  2849378 / DURATION:   264 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6680 / AVG_LOSS: 0.00247 / MODE: exploit
EPISODE:  11555 / TIMESTEP:  2849678 / DURATION:   300 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4422 / AVG_LOSS: 0.00329 / MODE: exploit
EPISODE:  11556 / TIMESTEP:  2849922 / DURATION:   244 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5196 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:  11557 / TIMESTEP:  2850090 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  11606 / TIMESTEP:  2861699 / DURATION:   363 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.7264 / AVG_LOSS: 0.00303 / MODE: exploit
EPISODE:  11607 / TIMESTEP:  2861923 / DURATION:   224 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5289 / AVG_LOSS: 0.00314 / MODE: exploit
EPISODE:  11608 / TIMESTEP:  2862148 / DURATION:   225 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.7036 / AVG_LOSS: 0.00365 / MODE: exploit
EPISODE:  11609 / TIMESTEP:  2862396 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5927 / AVG_LOSS: 0.00311 / MODE: exploit
EPISODE:  11610 / TIMESTEP:  2862679 / DURATION:   283 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6168 / AVG_LOSS: 0.00326 / MODE: exploit
EPISODE:  11611 / TIMESTEP:  2862911 / DURATION:   232 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.4273 / AVG_LOSS: 0.00345 / MODE: exploit
EPISODE:  11612 / TIMESTEP:  2863088 / DURATION:   177 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  11661 / TIMESTEP:  2874790 / DURATION:   251 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.5692 / AVG_LOSS: 0.00291 / MODE: exploit
EPISODE:  11662 / TIMESTEP:  2874970 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.3653 / AVG_LOSS: 0.00351 / MODE: exploit
EPISODE:  11663 / TIMESTEP:  2875222 / DURATION:   252 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.5484 / AVG_LOSS: 0.00277 / MODE: exploit
EPISODE:  11664 / TIMESTEP:  2875428 / DURATION:   206 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5516 / AVG_LOSS: 0.00264 / MODE: exploit
EPISODE:  11665 / TIMESTEP:  2875706 / DURATION:   278 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6025 / AVG_LOSS: 0.00370 / MODE: exploit
EPISODE:  11666 / TIMESTEP:  2875895 / DURATION:   189 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.6040 / AVG_LOSS: 0.00308 / MODE: exploit
EPISODE:  11667 / TIMESTEP:  2876060 / DURATION:   165 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  11716 / TIMESTEP:  2888435 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.6027 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:  11717 / TIMESTEP:  2888658 / DURATION:   223 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5881 / AVG_LOSS: 0.00310 / MODE: exploit
EPISODE:  11718 / TIMESTEP:  2888834 / DURATION:   176 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.4690 / AVG_LOSS: 0.00263 / MODE: exploit
EPISODE:  11719 / TIMESTEP:  2889039 / DURATION:   205 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.6321 / AVG_LOSS: 0.00394 / MODE: exploit
EPISODE:  11720 / TIMESTEP:  2889213 / DURATION:   174 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5207 / AVG_LOSS: 0.00307 / MODE: exploit
EPISODE:  11721 / TIMESTEP:  2889492 / DURATION:   279 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.6294 / AVG_LOSS: 0.00353 / MODE: exploit
EPISODE:  11722 / TIMESTEP:  2889719 / DURATION:   227 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  11771 / TIMESTEP:  2901691 / DURATION:   250 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.5629 / AVG_LOSS: 0.00356 / MODE: exploit
EPISODE:  11772 / TIMESTEP:  2901899 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5639 / AVG_LOSS: 0.00330 / MODE: exploit
EPISODE:  11773 / TIMESTEP:  2902068 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5412 / AVG_LOSS: 0.00339 / MODE: exploit
EPISODE:  11774 / TIMESTEP:  2902238 / DURATION:   170 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5601 / AVG_LOSS: 0.00315 / MODE: exploit
EPISODE:  11775 / TIMESTEP:  2902490 / DURATION:   252 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.5046 / AVG_LOSS: 0.00378 / MODE: exploit
EPISODE:  11776 / TIMESTEP:  2902668 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5656 / AVG_LOSS: 0.00301 / MODE: exploit
EPISODE:  11777 / TIMESTEP:  2902862 / DURATION:   194 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  11826 / TIMESTEP:  2914385 / DURATION:   218 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5990 / AVG_LOSS: 0.00281 / MODE: exploit
EPISODE:  11827 / TIMESTEP:  2914809 / DURATION:   424 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_MAX_Q: 1.5710 / AVG_LOSS: 0.00321 / MODE: exploit
EPISODE:  11828 / TIMESTEP:  2915057 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5803 / AVG_LOSS: 0.00334 / MODE: exploit
EPISODE:  11829 / TIMESTEP:  2915350 / DURATION:   293 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.7071 / AVG_LOSS: 0.00331 / MODE: exploit
EPISODE:  11830 / TIMESTEP:  2915519 / DURATION:   169 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5390 / AVG_LOSS: 0.00255 / MODE: exploit
EPISODE:  11831 / TIMESTEP:  2915699 / DURATION:   180 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.4313 / AVG_LOSS: 0.00269 / MODE: exploit
EPISODE:  11832 / TIMESTEP:  2915933 / DURATION:   234 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  11881 / TIMESTEP:  2927565 / DURATION:   168 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5903 / AVG_LOSS: 0.00282 / MODE: exploit
EPISODE:  11882 / TIMESTEP:  2927736 / DURATION:   171 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5092 / AVG_LOSS: 0.00272 / MODE: exploit
EPISODE:  11883 / TIMESTEP:  2927908 / DURATION:   172 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5001 / AVG_LOSS: 0.00236 / MODE: exploit
EPISODE:  11884 / TIMESTEP:  2928154 / DURATION:   246 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.6776 / AVG_LOSS: 0.00257 / MODE: exploit
EPISODE:  11885 / TIMESTEP:  2928494 / DURATION:   340 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.6635 / AVG_LOSS: 0.00304 / MODE: exploit
EPISODE:  11886 / TIMESTEP:  2928667 / DURATION:   173 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5945 / AVG_LOSS: 0.00246 / MODE: exploit
EPISODE:  11887 / TIMESTEP:  2928915 / DURATION:   248 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_

EPISODE:  11936 / TIMESTEP:  2940271 / DURATION:   320 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.6440 / AVG_LOSS: 0.00286 / MODE: exploit
EPISODE:  11937 / TIMESTEP:  2940583 / DURATION:   312 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.7502 / AVG_LOSS: 0.00260 / MODE: exploit
EPISODE:  11938 / TIMESTEP:  2940838 / DURATION:   255 / EPSILON: 1.00000 / TOTAL_REWARD:   2 / AVG_MAX_Q: 1.4311 / AVG_LOSS: 0.00353 / MODE: exploit
EPISODE:  11939 / TIMESTEP:  2941055 / DURATION:   217 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5272 / AVG_LOSS: 0.00289 / MODE: exploit
EPISODE:  11940 / TIMESTEP:  2941375 / DURATION:   320 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.7272 / AVG_LOSS: 0.00326 / MODE: exploit
EPISODE:  11941 / TIMESTEP:  2941617 / DURATION:   242 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5064 / AVG_LOSS: 0.00275 / MODE: exploit
EPISODE:  11942 / TIMESTEP:  2941795 / DURATION:   178 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_

EPISODE:  11991 / TIMESTEP:  2953150 / DURATION:   179 / EPSILON: 1.00000 / TOTAL_REWARD:   0 / AVG_MAX_Q: 1.5495 / AVG_LOSS: 0.00221 / MODE: exploit
EPISODE:  11992 / TIMESTEP:  2953372 / DURATION:   222 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.5501 / AVG_LOSS: 0.00313 / MODE: exploit
EPISODE:  11993 / TIMESTEP:  2953733 / DURATION:   361 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.5402 / AVG_LOSS: 0.00292 / MODE: exploit
EPISODE:  11994 / TIMESTEP:  2954089 / DURATION:   356 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.5063 / AVG_LOSS: 0.00309 / MODE: exploit
EPISODE:  11995 / TIMESTEP:  2954432 / DURATION:   343 / EPSILON: 1.00000 / TOTAL_REWARD:   3 / AVG_MAX_Q: 1.7050 / AVG_LOSS: 0.00326 / MODE: exploit
EPISODE:  11996 / TIMESTEP:  2954640 / DURATION:   208 / EPSILON: 1.00000 / TOTAL_REWARD:   1 / AVG_MAX_Q: 1.6202 / AVG_LOSS: 0.00271 / MODE: exploit
EPISODE:  11997 / TIMESTEP:  2955020 / DURATION:   380 / EPSILON: 1.00000 / TOTAL_REWARD:   4 / AVG_

In [10]:
MAX_FRAMES_TEST = 300
frames = list()

observation = env.reset()

last_observation = observation
observation, _, _, _ = env.step(1) 
state = get_initial_state(observation, last_observation)
for _ in range(MAX_FRAMES_TEST):
    last_observation = observation

    action = np.argmax(q_values.eval(feed_dict={s: [np.float32(state / 255.0)]}))
    observation, _, terminal, _ = env.step(action)
    
    frames.append(observation)

   # env.render()
    processed_observation = preprocess(observation, last_observation)
    state = np.dstack((state[:, :, 1:], processed_observation))
    #state = np.append(state[1:, :, :], processed_observation, axis=0)

  warn("The default mode, 'constant', will be changed to 'reflect' in "


In [12]:
from matplotlib import animation
from JSAnimation.IPython_display import display_animation
import matplotlib.pyplot as plt
def display_frames_as_gif(frames, filename_gif = None):
    """
    Displays a list of frames as a gif, with controls
    """
    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 72)
    patch = plt.imshow(frames[0])
    plt.axis('off')

    def animate(i):
        patch.set_data(frames[i])

    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=50)
    if filename_gif: 
        anim.save(filename_gif, writer = 'imagemagick', fps=20)
    display(display_animation(anim, default_mode='loop'))
display_frames_as_gif(frames[:300])

In [None]:

obs, _, _, _ = env.step(1)
%matplotlib inline
plt.imshow(obs)