In [1]:
import gym
import numpy as np
import math
import random

In [3]:
env = gym.make('CartPole-v0')

In [4]:
env.action_space.n

2

In [5]:
env.observation_space

Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)

In [40]:
NUM_BUCKETS = (1, 1, 6, 3)

In [7]:
NUM_ACTIONS = env.action_space.n

In [8]:
STATE_BOUNDS = list(zip(env.observation_space.low, env.observation_space.high))

In [9]:
STATE_BOUNDS

[(-4.8, 4.8),
 (-3.4028235e+38, 3.4028235e+38),
 (-0.41887903, 0.41887903),
 (-3.4028235e+38, 3.4028235e+38)]

In [10]:
STATE_BOUNDS[1] = [-0.5, 0.5]

In [11]:
STATE_BOUNDS[3] = [-math.radians(50), math.radians(50)]

In [12]:
STATE_BOUNDS

[(-4.8, 4.8),
 [-0.5, 0.5],
 (-0.41887903, 0.41887903),
 [-0.8726646259971648, 0.8726646259971648]]

In [42]:
q_table = np.zeros(NUM_BUCKETS + (NUM_ACTIONS,))

In [43]:
q_table

array([[[[[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.]]]]])

In [46]:
q_table.shape

(1, 1, 6, 3, 2)

In [23]:
EXPLORE_MIN_RATE = 0.01

In [20]:
LEARNING_MIN_RATE = 0.1 # alpha

In [19]:
def get_explore_rate(t):
    return max(EXPLORE_MIN_RATE, min(1, 1.0 - math.log10((t+1)/25)))

In [21]:
def get_learning_rate(t):
    return max(LEARNING_MIN_RATE, min(1, 1.0 - math.log10((t+1)/25)))

In [29]:
def select_action(state, explore_rate):
    if random.random() < explore_rate:
        action = env.action_space.sample()
    else:
        action = np.maxarg(q_table[state])
    return action

In [54]:
def state_of_bucket(state):
    bucket_indices = []
    for i in range(len(state)):
        if state[i] <= STATE_BOUNDS[i][0]:
            bucket_index = 0
        elif state[i] >= STATE_BOUNDS[i][1]:
            bucket_index = NUM_BUCKETS[i]-1
        else:
            bound_width = STATE_BOUNDS[i][1] - STATE_BOUNDS[i][0]
            offset = (NUM_BUCKETS[i]-1) * STATE_BOUNDS[i][0] / bound_width
            scalling = (NUM_BUCKETS[i]-1) / bound_width
            bucket_index = int(round(state[i]*scalling - offset))
        bucket_indices.append(bucket_index)
    return tuple(bucket_indices)

In [59]:
def simulate():
    learning_rate = get_learning_rate(0)
    explore_rate = get_explore_rate(0)
    discount_factor = 0.99 # gamma
    num_streaks = 0  # number of 200 times the cart has balanced pole
    for episode in range(1000):
        observ = env.reset()
        state_0 = state_of_bucket(observ)
        for t in range(250):  # 250 timesteps
            env.render()
            action = select_action(state_0, explore_rate)
            observ, reward, done, step = env.step(action)
            state = state_of_bucket(observ)
            print(state)
            best_q = np.amax(q_table[state]) # q learning, next best state
            q_table[state_0 + (action,)] = learning_rate * (reward + discount_factor * best_q - q_table[state_0 + (action,)])
            state_0 = state
            print(f'\nEpisode {episode}')
            print(f'time step {t}')
            print(f'Action {action}')
            print(f'State {state}')
            print(f'Reward {reward}')
            print(f'Best Q {best_q}')
            print(f'Explore rate {explore_rate}')
            print(f'Learning {learning_rate}')
            print(f'Streaks {num_streaks}')
            print('')
            
            if done:
                print(f'Episode {episode} is done after time steps {t}')
                if t >= 199:
                    num_streaks += 1
                else:
                    num_streaks = 0
                break
    

In [60]:
simulate()

(0, 0, 2, 1)

Episode 0
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.0
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 0
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 0
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.99
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 0
time step 3
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 0
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.99
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 0
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.0
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 0
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.0
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 0
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)



(0, 0, 3, 2)

Episode 3
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.9701
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 3
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 3.940399
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 3
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 3.930499
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 3
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 3.940399
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 3
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 3.940399
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 3
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.9602989999999987
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 3
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9803970099999999
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 3
time step 11
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.

(0, 0, 2, 1)

Episode 4
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901997011278898
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 4
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0001852255544494
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 4
time step 13
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0000853703101837
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 4
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0000853703101837
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 4
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0000853703101837
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 4
time step 16
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0000853703101837
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 4
time step 17
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990199000002
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 4
tim

(0, 0, 3, 1)

Episode 7
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.883248652348435
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 7
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8745091409339572
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 7
time step 4
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9812549085906603
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 7
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9902748460282382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 7
time step 6
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9991171889772956
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 7
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9995337935885695
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 7
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9995337935885695
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 7
time step 9

(0, 0, 2, 1)

Episode 9
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.9597357267750726
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 9
time step 17
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.9801009801980098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 9
time step 18
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.9798973276637806
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 9
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900000878644977
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 9
time step 20
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900000878644977
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 9
time step 21
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9997030494758616
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 9
time step 22
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9997059311166052
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 9
tim

(0, 0, 3, 1)

Episode 12
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 3.871510020257218
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 12
time step 8
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900970900347297
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 12
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 3.8426959202447843
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 12
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 3.814169931942683
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 12
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.000094069705814
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 12
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9612848997974277
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 12
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9612848997974277
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 12
t

(0, 0, 2, 1)

Episode 15
time step 21
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0098948646248278
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 15
time step 22
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0098948646248278
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 15
time step 23
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0194940999875826
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 15
time step 24
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0194940999875826
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 15
time step 25
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.979715678383688
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 15
time step 26
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.970113462599727
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 15
time step 27
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.979715678383688
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 1

(0, 0, 2, 0)

Episode 18
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.970199038523475
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 18
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.970101066164186
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 18
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.970101066164186
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 18
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.970101066164186
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 18
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.970199038523475
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 18
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.970199038523475
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 18
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.970005063529401
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 18
time step

(0, 0, 3, 2)

Episode 21
time step 24
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0164892431800403
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 21
time step 25
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0443670635621847
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 21
time step 26
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0603179745182703
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 21
time step 27
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0603179745182703
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 21
time step 28
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0603179745182703
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 21
time step 29
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.005347731210903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 21
time step 30
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.005347731210903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 

(0, 0, 3, 2)

Episode 24
time step 24
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990242197867
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 24
time step 25
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990097578022
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 24
time step 26
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990105492942
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 24
time step 27
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099010685999
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 24
time step 28
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990580212021
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 24
time step 29
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990580212021
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 24
time step 30
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990580212021
Explore rate 1
Learning 1
Streaks 0

Episode 24 is done af

(0, 0, 2, 1)

Episode 27
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990100911115007
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 27
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0087279964943179
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 27
time step 13
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990100911115007
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 27
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99009900990197
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 27
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99009900990197
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 27
time step 16
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 27
time step 17
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.99999901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 27
time step

(0, 0, 2, 1)

Episode 30
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0106313407402876
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 30
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9913788189700776
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 30
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9913788189700776
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 30
time step 3
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9995409678262259
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 30
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9995409678262259
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 30
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.9608793080902478
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 30
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.9608793080902478
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 30
ti

(0, 0, 3, 2)

Episode 34
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0087657301043689
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 34
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0087657301043689
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 34
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9897237912911323
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 34
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0085789333818038
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 34
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0085789333818038
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 34
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9899142106792649
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 34
time step 14
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.990099009905802
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 3

(0, 0, 3, 2)

Episode 37
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902838089397634
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 37
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904686643779343
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 37
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902801687943916
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 37
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902801687943916
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 37
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904613084386431
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 37
time step 6
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902801687943916
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 37
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900971983120561
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 37
ti

(0, 0, 3, 0)

Episode 40
time step 11
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.99951584709766
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 40
time step 12
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9902948640308096
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 40
time step 13
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9903870738614782
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 40
time step 14
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9903870738614782
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 40
time step 15
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901883390920538
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 40
time step 16
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9994216870014365
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 40
time step 17
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9994216870014365
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 

(0, 0, 3, 2)

Episode 42
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9992421262675466
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 42
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9992421262675466
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 42
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9992421262675466
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 42
time step 12
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010383
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 42
time step 13
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010383
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 42
time step 14
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010383
Explore rate 1
Learning 1
Streaks 0

Episode 42 is done after time steps 14
(0, 0, 3, 1)

Episode 43
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.999333515790152
Explore rate 1
Learni

(0, 0, 3, 2)

Episode 47
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099046556
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 47
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099046005
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 47
time step 9
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 47
time step 10
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 47
time step 11
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

Episode 47 is done after time steps 11
(0, 0, 3, 1)

Episode 48
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901008276511252
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 48
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901922573857231
Explore rate 1
Learning

(0, 0, 4, 2)

Episode 50
time step 22
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 50
time step 23
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

Episode 50 is done after time steps 23
(0, 0, 3, 1)

Episode 51
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900999145705456
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 51
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900999145705907
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 51
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900999145705907
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 51
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900999145705907
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 51
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900999145705456
Explore rate 1
Learning

(0, 0, 2, 0)

Episode 54
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901904394375369
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 54
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901904394375369
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 54
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901904300593425
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 54
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901904394375369
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 54
time step 11
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900981709409591
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 54
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900981709409591
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 54
time step 13
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990182905904
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 5

(0, 0, 4, 1)

Episode 55
time step 28
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 1.9410939703989993
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 55
time step 29
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 55
time step 30
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 55
time step 31
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 1.029203059303971
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 55
time step 32
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 1.029203059303971
Explore rate 1
Learning 1
Streaks 0

Episode 55 is done after time steps 32
(0, 0, 3, 1)

Episode 56
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990459759083
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 56
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.9315839471720029
Explore rate 1
Learnin

(0, 0, 2, 1)

Episode 59
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901000150941595
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 59
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901896380499104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 59
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901878090174134
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 59
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901878090174134
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 59
time step 10
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901878090174134
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 59
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990184154321
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 59
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990184154321
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 59

(0, 0, 2, 0)

Episode 62
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.018413145214869
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 62
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0354960479311284
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 62
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0354960479311284
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 62
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9896450395206886
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 62
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990103549604793
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 62
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0067249730398442
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 62
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.006727942236948
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 62
time 

(0, 0, 3, 2)

Episode 64
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9909290760608125
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 64
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.028307061837822
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 64
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900981408028227
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 64
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900981408028227
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 64
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990185919718
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 64
time step 15
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0062153733534278
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 64
time step 16
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0060542098058132
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode

(0, 0, 3, 2)

Episode 66
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9908682205094477
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 66
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9908682205094477
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 66
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9908682205094477
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 66
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9908530841088175
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 66
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9908532354728237
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 66
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9908530841088175
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 66
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900914691589118
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 66
t

(0, 0, 2, 1)

Episode 68
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0619616232739162
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 68
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9892474528200774
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 68
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989981220685
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 68
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989981220685
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 68
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990100187792
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 68
time step 7
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0628046649006309
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 68
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0628046649006309
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 68
ti

(0, 0, 3, 2)

Episode 70
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9549713937751667
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 70
time step 15
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904502860622482
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 70
time step 16
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900954971393774
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 70
time step 17
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990450286062
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 70
time step 18
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009549714
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 70
time step 19
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 70
time step 20
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode

(0, 0, 2, 0)

Episode 72
time step 25
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.96080595504941
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 72
time step 26
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990098039311
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 72
time step 27
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990098039311
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 72
time step 28
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099019704
Explore rate 1
Learning 1
Streaks 0

Episode 72 is done after time steps 28
(0, 0, 3, 1)

Episode 73
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8807606566889208
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 73
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900916536396192
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 73
time step 2
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900916536396192
Explore rate 1
Learning

(0, 0, 2, 1)

Episode 75
time step 16
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901112376145134
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 75
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990069345268
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 75
time step 18
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990128903738652
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 75
time step 19
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990128903738652
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 75
time step 20
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9909801618811882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 75
time step 21
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9909801618811882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 75
time step 22
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9909414565237242
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 

(0, 0, 3, 2)

Episode 78
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098965656
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 78
time step 11
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990490050394
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 78
time step 12
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990490050394
Explore rate 1
Learning 1
Streaks 0

Episode 78 is done after time steps 12
(0, 0, 3, 1)

Episode 79
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0008088528707095
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 79
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.001566861233396
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 79
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.001566861233396
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 79
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9990812699073901
Explore rate 1
Learning 

(0, 0, 3, 2)

Episode 82
time step 18
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900981453488567
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 82
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900981453488567
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 82
time step 20
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099934627815
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 82
time step 21
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990006537218
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 82
time step 22
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 82
time step 23
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 82 is done after time steps 23
(0, 0, 2, 1)

Episode 83
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990186165179223
Explore rate 1
Learni

(0, 0, 2, 1)

Episode 85
time step 28
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.999507008939662
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 85
time step 29
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.999507008939662
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 85
time step 30
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.999507008939662
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 85
time step 31
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.999507008939662
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 85
time step 32
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990184426464
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 85
time step 33
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901930983476166
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 85
time step 34
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901930983476166
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 85

(0, 0, 3, 2)

Episode 87
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901099786848573
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 87
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901099786848573
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 87
time step 12
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901098689970189
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episode 87
time step 13
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 1.9410939703989993
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episode 87
time step 14
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 1.9410939703989993
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episode 87
time step 15
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 0.9805890602960097
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episode 87
time step 16
Action 0
State (0, 0, 4, 1)
Reward 1.0
Best Q 0.9901941093970399
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episod

(0, 0, 2, 2)

Episode 89
time step 27
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9902169957975344
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 89
time step 28
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9999883580280322
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 89
time step 29
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9999883580280322
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 89
time step 30
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9999883580280322
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 89
time step 31
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9997883426029457
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 89
time step 32
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9997903427571966
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 89
time step 33
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9997903427571966
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episod

(0, 0, 3, 2)

Episode 91
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900881511920914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 91
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991184880792
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 91
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0082254240862245
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 91
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0082254240862245
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 91
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0082254240862245
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 91
time step 15
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990102920306
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 91
time step 16
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990102920306
Explore rate 1
Learning 1
Streaks 0

Episode 91 is done a

(0, 0, 3, 2)

Episode 94
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901901506245991
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 94
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901901506245991
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 94
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900983656870677
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 94
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990163431294
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 94
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900999276859441
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 94
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900999276859441
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 94
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900999120659553
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 94
ti

(0, 0, 3, 0)

Episode 97
time step 29
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.903213280961974
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 97
time step 30
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.903213280961974
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 97
time step 31
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.903213280961974
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 97
time step 32
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9985812288577773
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 97
time step 33
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9985812288577773
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 97
time step 34
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900141877114224
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 97
time step 35
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9985009394139437
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 9

(0, 0, 3, 2)

Episode 99
time step 39
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901822374032496
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 99
time step 40
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901814052991398
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 99
time step 41
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900981859470086
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 99
time step 42
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990181405298
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 99
time step 43
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010287
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 99
time step 44
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010287
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 99
time step 45
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009896
Explore rate 1
Learning 1
Streaks 0

Episode 99 is done a

(0, 0, 2, 1)

Episode 100
time step 58
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9993148286781535
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 100
time step 59
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901007289870367
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 100
time step 60
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901007289870367
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 100
time step 61
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990181405298
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 100
time step 62
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990181405298
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 100
time step 63
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098185947
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 100
time step 64
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099018141
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 2, 1)

Episode 102
time step 22
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9978494483685001
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 102
time step 23
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9978494483685001
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 102
time step 24
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990119005033
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 102
time step 25
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990119005033
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 102
time step 26
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099038351
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 102
time step 27
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099038351
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 102
time step 28
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009617
Explore rate 1
Learning 1
Streaks 0

Episode 102 i

(0, 0, 2, 1)

Episode 105
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9971747062233747
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 105
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900961287721177
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 105
time step 13
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990387122788
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 105
time step 14
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990096128772
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 105
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901697665789821
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 105
time step 16
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901697665789821
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 105
time step 17
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990169059300315
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)



(0, 0, 3, 2)

Episode 108
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9968349450149905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 108
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9968347904797061
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 108
time step 11
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9968347904797061
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 108
time step 12
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9968347904797061
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 108
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901134128369413
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 108
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900988658716305
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 108
time step 15
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990113412838
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)



(0, 0, 2, 0)

Episode 110
time step 30
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900996810592155
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 110
time step 31
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902251001169846
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 110
time step 32
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902251001169846
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 110
time step 33
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990102854226
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 110
time step 34
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990102854226
Explore rate 1
Learning 1
Streaks 0

Episode 110 is done after time steps 34
(0, 0, 2, 1)

Episode 111
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900356938336197
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 111
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901024865284906
Explore rate

(0, 0, 3, 1)

Episode 113
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901027195132146
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 113
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901027195132146
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 113
time step 8
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901027195132146
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 113
time step 9
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901027195132146
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 113
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990100992066
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 113
time step 11
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.8608781821701825
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 113
time step 12
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8521703904494726
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Epi

(0, 0, 2, 0)

Episode 115
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99009900966496
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 115
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.7929089792492203
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 115
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.784880880156725
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 115
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.784880880156725
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 115
time step 17
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.784880880156725
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 115
time step 18
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.784880880156725
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 115
time step 19
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.784880880156725
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episod

(0, 0, 3, 1)

Episode 119
time step 12
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9991458464887857
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 119
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900085415351121
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 119
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990095992779
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 119
time step 15
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990095992779
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 119
time step 16
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990394717668
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 119
time step 17
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0066633406181795
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 119
time step 18
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0066633406181795
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 3, 0)

Episode 121
time step 23
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0310938788857213
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 121
time step 24
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0310938788857213
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 121
time step 25
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990103914621359
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 121
time step 26
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990103914621359
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 121
time step 27
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901019668951303
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 121
time step 28
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901019863723926
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 121
time step 29
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901019668951303
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

E

(0, 0, 1, 0)

Episode 124
time step 11
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9901046713360109
Explore rate 1
Learning 1
Streaks 0

Episode 124 is done after time steps 11
(0, 0, 2, 1)

Episode 125
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0064093954141249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 125
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902605788808783
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 125
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900973942111913
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 125
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900988430838147
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 125
time step 4
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900988430838147
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 125
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9903327211564554
Explore rate 1
L

(0, 0, 2, 1)

Episode 127
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989442907793
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 127
time step 18
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9903303823757292
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 127
time step 19
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9903280679948798
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 127
time step 20
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9903280679948798
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 127
time step 21
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.0228027743789883
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 127
time step 22
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0224780273151473
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 127
time step 23
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0224780273151473
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 3, 1)

Episode 130
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9897954991145919
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 130
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.000901352133382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 130
time step 3
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901022859027115
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 130
time step 4
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901022859027115
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 130
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9904072989169135
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 130
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9904072989169135
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 130
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901052137805519
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode

(0, 0, 2, 1)

Episode 134
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9903311518533355
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 134
time step 3
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9903311518533355
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 134
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9906371263013147
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 134
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9906371263013147
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 134
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990399603184966
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 134
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990104641743
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 134
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990104641743
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode

(0, 0, 2, 2)

Episode 135
time step 42
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990330382375
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 135
time step 43
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9903072579898222
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 135
time step 44
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9903072579898222
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 135
time step 45
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0012318862420873
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 135
time step 46
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0012318862420873
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 135
time step 47
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901036378725248
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 135
time step 48
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902149203562203
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 2, 1)

Episode 138
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990669870362
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 138
time step 11
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990669870362
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 138
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990669870362
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 138
time step 13
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990669870362
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 138
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991484867516
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 138
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900991476717544
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 138
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990077082854
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 3, 1)

Episode 141
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9902080892400327
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 141
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991478722205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 141
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991478722205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 141
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991478722205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 141
time step 6
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991478722205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 141
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990101850848
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 141
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009898149
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode

(0, 0, 1, 1)

Episode 144
time step 19
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990099010194
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 144
time step 20
Action 1
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990099009896
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 144
time step 21
Action 1
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9901019651168463
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 144
time step 22
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990099009896
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 144
time step 23
Action 1
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 144
time step 24
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990394531488
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 144
time step 25
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990394531488
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)


(0, 0, 2, 1)

Episode 148
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901021207591225
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 148
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990107432366
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 148
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990107432366
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 148
time step 3
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098925677
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 148
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099031981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 148
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099115983
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 148
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099031981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episod

(0, 0, 4, 2)

Episode 149
time step 14
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 149 is done after time steps 14
(0, 0, 2, 1)

Episode 150
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990104444287
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 150
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099009903787
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 150
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009621
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 150
time step 3
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 150
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099064249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 150
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099064249
Explore rate 1
Le

(0, 0, 2, 1)

Episode 152
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902920891731544
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 152
time step 13
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901009407949635
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 152
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.940991027213678
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 152
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.9314821263494912
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 152
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9711762778723185
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 152
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900970791082684
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 152
time step 18
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900970791082684
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)



(0, 0, 3, 2)

Episode 153
time step 41
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901022054484618
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 153
time step 42
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901021734868056
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 153
time step 43
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990406972097
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 153
time step 44
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901021734868056
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 153
time step 45
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901021734868056
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 153
time step 46
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900989782651319
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 153
time step 47
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901021110547279
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)


(0, 0, 2, 0)

Episode 154
time step 32
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.9479775176799294
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 154
time step 33
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.9479775176799294
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 154
time step 34
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9805202248232008
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 154
time step 35
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 1.9702969854426615
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 154
time step 36
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 1.9702969854426615
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 154
time step 37
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990109823642
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 154
time step 38
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990109823642
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 4, 2)

Episode 156
time step 30
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 2.0195059429232054
Explore rate 1
Learning 1
Streaks 0

Episode 156 is done after time steps 30
(0, 0, 3, 1)

Episode 157
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901028767617439
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 157
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0100826224385795
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 157
time step 2
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0100826224385795
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 157
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9994871568778565
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 157
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9994871568778565
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 157
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0087814414539555
Explore rate 1
L

(0, 0, 1, 0)

Episode 160
time step 8
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099118034
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 160
time step 9
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099118034
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 160
time step 10
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099116957
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 160
time step 11
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099116969
Explore rate 1
Learning 1
Streaks 0

Episode 160 is done after time steps 11
(0, 0, 3, 1)

Episode 161
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0096724762946572
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 161
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9995931115334638
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 161
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9996939051810756
Explore rate 1


(0, 0, 2, 1)

Episode 164
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902803536554161
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 164
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902786200206486
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 164
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900972137997934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 164
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990278620021
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 164
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990281309857767
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 164
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990281309857767
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 164
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902803449086384
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Epi

(0, 0, 2, 0)

Episode 167
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991365375456
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 167
time step 3
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990086346244
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 167
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991363691165
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 167
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991363708009
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 167
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991363691165
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 167
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991363691334
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 167
time step 8
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099116969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episod

(0, 0, 4, 2)

Episode 171
time step 8
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9899981953380523
Explore rate 1
Learning 1
Streaks 0

Episode 171 is done after time steps 8
(0, 0, 3, 1)

Episode 172
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9996871489090977
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 172
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901978284359734
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 172
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9996871489090977
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 172
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9996871489090977
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 172
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990003128510909
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 172
time step 5
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099968714891
Explore rate 1
Learn

(0, 0, 1, 0)

Episode 174
time step 8
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099116957
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 174
time step 9
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099116957
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 174
time step 10
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099116957
Explore rate 1
Learning 1
Streaks 0

Episode 174 is done after time steps 10
(0, 0, 2, 1)

Episode 175
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 3.88164190979104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 175
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9611835809020901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 175
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0097071351732942
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 175
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.989902928648267
Explore rate 1
Lear

(0, 0, 3, 0)

Episode 177
time step 19
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0001464297325853
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 177
time step 20
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9899985357026742
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 177
time step 21
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99008483054425
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 177
time step 22
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99008483054425
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 177
time step 23
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991516945574
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 177
time step 24
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9903691979932865
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 177
time step 25
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9903691979932865
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Epi

(0, 0, 3, 2)

Episode 179
time step 24
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0180644128636736
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 179
time step 25
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0180644128636736
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 179
time step 26
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.999121292763848
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 179
time step 27
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900087870723615
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 179
time step 28
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900999121292764
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 179
time step 29
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9903795571366201
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 179
time step 30
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010003
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)



(0, 0, 3, 1)

Episode 181
time step 44
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9898378575812472
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 181
time step 45
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990102681744407
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 181
time step 46
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990098973182556
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 181
time step 47
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901026152849219
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 181
time step 48
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901026152849219
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 181
time step 49
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901034999402902
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 181
time step 50
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990102578863898
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Ep

(0, 0, 2, 1)

Episode 183
time step 19
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.015705044121938
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 183
time step 20
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 6.657341217407035
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 183
time step 21
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 6.575324486564199
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 183
time step 22
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 6.657341217407035
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 183
time step 23
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 6.657341217407035
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 183
time step 24
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 6.376773946594405
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 183
time step 25
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990360205773853
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episod

(0, 0, 2, 1)

Episode 186
time step 12
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 6.325987305009762
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 186
time step 13
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9906453373734916
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 186
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 6.349549216904719
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 186
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 6.242054967685817
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 186
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0149420591516154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 186
time step 17
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0149420591516154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 186
time step 18
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9898505794084838
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Ep

(0, 0, 2, 0)

Episode 190
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901014942059152
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 190
time step 3
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900989850579409
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 190
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0423962789192487
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 190
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0397323060706436
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 190
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0397323060706436
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 190
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0397323060706436
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 190
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.039235997951996
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode

(0, 0, 2, 0)

Episode 193
time step 16
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9922280765106242
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 193
time step 17
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990120069346
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 193
time step 18
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990120069346
Explore rate 1
Learning 1
Streaks 0

Episode 193 is done after time steps 18
(0, 0, 3, 1)

Episode 194
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9905882255039931
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 194
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9905882255039931
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 194
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9903195729284029
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 194
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900968042707161
Explore rate 1

(0, 0, 3, 2)

Episode 197
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990106289736477
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 197
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990106289736477
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 197
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900989371026352
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 197
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901062911997969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 197
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900989371026352
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 197
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990106289735
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 197
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098937101
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epi

(0, 0, 3, 2)

Episode 199
time step 22
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9903472222139234
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 199
time step 23
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099019882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 199
time step 24
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099019882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 199
time step 25
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099019882
Explore rate 1
Learning 1
Streaks 0

Episode 199 is done after time steps 25
(0, 0, 3, 1)

Episode 200
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9903472222114917
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 200
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0439200745436483
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 200
time step 2
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0439200745436483
Explore rate 

(0, 0, 2, 0)

Episode 203
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900713484860995
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 203
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900713484860995
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 203
time step 13
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9906117139494488
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 203
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0430867015539749
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 203
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.04202158621458
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 203
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0430867015539749
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 203
time step 17
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9906342483238548
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

E

(0, 0, 2, 1)

Episode 205
time step 28
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9895094648227813
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 205
time step 29
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9895094648227813
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 205
time step 30
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901049053517721
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 205
time step 31
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9913077360203706
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 205
time step 32
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9924696791214249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 205
time step 33
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9924696791214249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 205
time step 34
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9924696791214249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 3, 2)

Episode 208
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9911385391832317
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 208
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9911385391832317
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 208
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9911385391832317
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 208
time step 14
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9911385391832317
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 208
time step 15
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906213076530905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 208
time step 16
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906264799683919
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 208
time step 17
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906213076530905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)


(0, 0, 2, 0)

Episode 211
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990093418656
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 211
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990093418656
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 211
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099065814
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 211
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009341
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 211
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990647356045
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 211
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009341
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 211
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Epis

(0, 0, 2, 0)

Episode 213
time step 21
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039578909914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 213
time step 22
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039578909914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 213
time step 23
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039578909914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 213
time step 24
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039578909914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 213
time step 25
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039578909914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 213
time step 26
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039078682285
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 213
time step 27
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900989609213178
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 3, 2)

Episode 216
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900988547611766
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 216
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901129466990461
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 216
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901129624708789
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 216
time step 12
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901129624708789
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 216
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9915222781117194
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 216
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9914941086315561
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 216
time step 15
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9914941086315561
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)



(0, 0, 3, 1)

Episode 220
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9915222781117194
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 220
time step 15
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9914948011027993
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 220
time step 16
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9915222781117194
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 220
time step 17
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9915222781117194
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 220
time step 18
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9914239762398649
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 220
time step 19
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900857602376012
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 220
time step 20
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901001254163424
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)


(0, 0, 2, 2)

Episode 223
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901253956757461
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 223
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901252655491246
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 223
time step 3
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901252655491246
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 223
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901252655491246
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 223
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991063682625
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 223
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991063682625
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 223
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991063682625
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episod

(0, 0, 1, 0)

Episode 227
time step 9
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990120283117
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 227
time step 10
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990120283117
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 227
time step 11
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990120283117
Explore rate 1
Learning 1
Streaks 0

Episode 227 is done after time steps 11
(0, 0, 3, 1)

Episode 228
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901229119181971
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 228
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990098770880818
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 228
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901763882871464
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 228
time step 3
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900991030529709
Explore rate 1
L

(0, 0, 2, 1)

Episode 229
time step 24
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900997649549934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 229
time step 25
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900997649549934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 229
time step 26
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900997649549934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 229
time step 27
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900997598537387
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 229
time step 28
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010865
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 229
time step 29
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990098582287
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 229
time step 30
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990120283117
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)


(0, 0, 2, 1)

Episode 232
time step 3
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990472892486
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 232
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990472892486
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 232
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990292581262
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 232
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990097074187
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 232
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990294100136
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 232
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990294100136
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 232
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990294100136
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episod

(0, 0, 2, 2)

Episode 235
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990593022536
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 235
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0104657426349382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 235
time step 3
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0104657426349382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 235
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900995091833458
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 235
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900995091833458
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 235
time step 6
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990049081664
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 235
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900995120087808
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episod

(0, 0, 2, 0)

Episode 236
time step 19
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099271845
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 236
time step 20
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099011986
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 236
time step 21
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099011986
Explore rate 1
Learning 1
Streaks 0

Episode 236 is done after time steps 21
(0, 0, 3, 1)

Episode 237
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0297687239522297
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 237
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0297687239522297
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 237
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0092103851880323
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 237
time step 3
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0094159685756743
Explore rate 1

(0, 0, 2, 1)

Episode 240
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0100603957471836
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 240
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900979886573091
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 240
time step 2
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900979886573091
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 240
time step 3
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9903084985206383
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 240
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902976127282079
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 240
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.009834073629492
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 240
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.009834073629492
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 

(0, 0, 2, 1)

Episode 242
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0092479411474222
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 242
time step 9
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0092479411474222
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 242
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9899151367504497
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 242
time step 11
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990087211722803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 242
time step 12
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990087211722803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 242
time step 13
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901006592048653
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 242
time step 14
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900854909730794
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epi

(0, 0, 2, 1)

Episode 245
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990096878567978
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 245
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9904762825233198
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 245
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902831305043143
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 245
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9904762825233198
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 245
time step 12
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9904762825233198
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 245
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9904762825233198
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 245
time step 14
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990123921633268
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Epi

(0, 0, 3, 2)

Episode 248
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.000098050909914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 248
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.000098050909914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 248
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900980223442233
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 248
time step 11
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 1.000198064071707
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 248
time step 12
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 1.000198064071707
Explore rate 1
Learning 1
Streaks 0

Episode 248 is done after time steps 12
(0, 0, 2, 1)

Episode 249
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901006430861545
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 249
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989935691384
Explore rate 1
Lea

(0, 0, 2, 1)

Episode 251
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9998972107700184
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 251
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.999026490225402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 251
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.999026490225402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 251
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901008498969639
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 251
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901008498969639
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 251
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901008498969639
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 251
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901008498969639
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode

(0, 0, 2, 1)

Episode 253
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990377026304
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 253
time step 11
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990377026304
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 253
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900999298261535
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 253
time step 13
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990377026304
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 253
time step 14
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990466238655
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 253
time step 15
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990466238655
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 253
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990173058926
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 3, 1)

Episode 254
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9907345767192561
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 254
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906587915710321
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 254
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9908131519730672
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 254
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906587915710321
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 254
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906587915710321
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 254
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9903635572769531
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 254
time step 11
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9908131519730672
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epis

Best Q 0.9993759760951615
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 256
time step 20
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9992831723123006
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 256
time step 21
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990007168276877
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 256
time step 22
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901008563550597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 256
time step 23
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009923
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 256
time step 24
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009923
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 256
time step 25
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099222633
Explore rate 1
Learning 1
Streaks 0

Episode 256 is done after time steps 25
(0, 0, 3, 1)

Episode 257
time step 0
Action 1
State

(0, 0, 3, 1)

Episode 258
time step 15
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9906593560900159
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 258
time step 16
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990098045936904
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 258
time step 17
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906547068111122
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 258
time step 18
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906547068111122
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 258
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906547068111122
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 258
time step 20
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9906547068111122
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 258
time step 21
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.99009902361618
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Ep

(0, 0, 3, 0)

Episode 261
time step 22
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9910579923381646
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 261
time step 23
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9910579923381646
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 261
time step 24
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9910579923381646
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 261
time step 25
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9910579923381646
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 261
time step 26
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9910579923381646
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 261
time step 27
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9910430497985383
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 261
time step 28
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9910484760799148
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)


(0, 0, 3, 1)

Episode 264
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9916925486001589
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 264
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9916925486001589
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 264
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9916925486001589
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 264
time step 7
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9916925486001589
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 264
time step 8
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.991033890185752
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 264
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9910404767698962
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 264
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9910404767698962
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episod

(0, 0, 2, 1)

Episode 267
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9910219875795272
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 267
time step 5
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9902851844954077
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 267
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900972235031076
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 267
time step 7
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900972235031076
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 267
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990277649688
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 267
time step 9
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9902851844954077
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 267
time step 10
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9902851844954077
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episo

(0, 0, 3, 1)

Episode 270
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099111763731
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 270
time step 13
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990088823627
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 270
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990909773665162
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 270
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901950697942055
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 270
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900894509945859
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 270
time step 17
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901950697942055
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 270
time step 18
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902036681016775
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

E

(0, 0, 3, 1)

Episode 271
time step 36
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9916476622795627
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 271
time step 37
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9916476622795627
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 271
time step 38
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9916476622795627
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 271
time step 39
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900989065496473
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 271
time step 40
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901143941069463
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 271
time step 41
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901143941069463
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 271
time step 42
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.990200593691525
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)



(0, 0, 2, 1)

Episode 273
time step 28
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990848778417
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 273
time step 29
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901064428744912
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 273
time step 30
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901064428744912
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 273
time step 31
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990119537550467
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 273
time step 32
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901192572971207
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 273
time step 33
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901192572971207
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 273
time step 34
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901192572971207
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)



(0, 0, 3, 2)

Episode 276
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901000782921259
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 276
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901000782921259
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 276
time step 12
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901000782921259
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 276
time step 13
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901000676082816
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 276
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901000677151199
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 276
time step 15
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990799386948
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 276
time step 16
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990799386948
Explore rate 1
Learning 1
Streaks 0

Episode 276 i

(0, 0, 2, 0)

Episode 279
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9908266113037356
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 279
time step 13
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099970307899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 279
time step 14
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099970307899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 279
time step 15
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099000296921
Explore rate 1
Learning 1
Streaks 0

Episode 279 is done after time steps 15
(0, 0, 3, 1)

Episode 280
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9908126960930566
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 280
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901020257086783
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 280
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900989797429132
Explore rate 1
L

(0, 0, 2, 0)

Episode 282
time step 19
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900988720152719
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 282
time step 20
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901061494081567
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 282
time step 21
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901062158988032
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 282
time step 22
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901062158988032
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 282
time step 23
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901187723288092
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 282
time step 24
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901186467645089
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 282
time step 25
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990101910814
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 1, 0)

Episode 285
time step 29
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990102870928
Explore rate 1
Learning 1
Streaks 0

Episode 285 is done after time steps 29
(0, 0, 2, 1)

Episode 286
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990160773508
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 286
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.992361437499234
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 286
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9923388071080352
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 286
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9923966476841519
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 286
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990792394566
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 286
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990792394566
Explore rate 1
Le

(0, 0, 2, 2)

Episode 288
time step 15
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9923966476841519
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 288
time step 16
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9923966476841519
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 288
time step 17
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9923966476841519
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 288
time step 18
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990792393172
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 288
time step 19
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990792393172
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 288
time step 20
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 288
time step 21
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990105943732
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 3, 2)

Episode 291
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099145828
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 291
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099145828
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 291
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099008542
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 291
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 291
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099077771
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 291
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099077771
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 291
time step 11
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099080047
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Epis

(0, 0, 2, 1)

Episode 292
time step 55
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901188674529224
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 292
time step 56
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901667667717953
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 292
time step 57
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901188674529224
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 292
time step 58
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990098971292
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 292
time step 59
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990098971292
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 292
time step 60
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099010287
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 292
time step 61
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.99029211870362
Explore rate 1
Learning 1
Streaks 0

Episode 292 is 

(0, 0, 1, 0)

Episode 296
time step 11
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099010287
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 296
time step 12
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099010287
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 296
time step 13
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9997059323901016
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 296
time step 14
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9997059323901016
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 296
time step 15
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9996098438542915
Explore rate 1
Learning 1
Streaks 0

Episode 296 is done after time steps 15
(0, 0, 2, 1)

Episode 297
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9946020509840199
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 297
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901222102033498
Explore rate

(0, 0, 1, 0)

Episode 299
time step 20
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9901973783149034
Explore rate 1
Learning 1
Streaks 0

Episode 299 is done after time steps 20
(0, 0, 2, 1)

Episode 300
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9997104912801504
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 300
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901961873974894
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 300
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901961873974894
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 300
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990076991492
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 300
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900999794961325
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 300
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990076991492
Explore rate 1
L

(0, 0, 2, 0)

Episode 304
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9968914835682774
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 304
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9967394893964772
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 304
time step 7
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9967394893964772
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 304
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9967394893964772
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 304
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9966507925735733
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 304
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990174171189
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 304
time step 11
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901654221369125
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Epis

(0, 0, 3, 2)

Episode 306
time step 6
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099001183
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 306
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009900999
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 306
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099627811
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 306
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099627811
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 306
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099621543
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 306
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099003784
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 306
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009961
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epis

(0, 0, 3, 2)

Episode 308
time step 17
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901638075230732
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 308
time step 18
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900983552565275
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 308
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099016138
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 308
time step 20
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099016138
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 308
time step 21
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901393801220515
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 308
time step 22
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901393801220515
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 308
time step 23
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901396310643036
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 3, 1)

Episode 310
time step 31
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900994947293007
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 310
time step 32
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900994876444844
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 310
time step 33
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009908
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 310
time step 34
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009908
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 310
time step 35
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 310
time step 36
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010603
Explore rate 1
Learning 1
Streaks 0

Episode 310 is done after time steps 36
(0, 0, 2, 1)

Episode 311
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900984379955016
Explore rat

(0, 0, 3, 1)

Episode 312
time step 31
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900999280609419
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 312
time step 32
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901903922699677
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 312
time step 33
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901903922699677
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 312
time step 34
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901903922699677
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 312
time step 35
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901894833405605
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 312
time step 36
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900981051665942
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 312
time step 37
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990189483341
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 2, 0)

Episode 315
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099029363483
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 315
time step 3
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990384878312
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 315
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099001392
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 315
time step 5
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099001392
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 315
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009985
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 315
time step 7
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 315
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990274568628992
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 

(0, 0, 2, 1)

Episode 318
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900999886076243
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 318
time step 10
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9992391702938843
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 318
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9991467916203394
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 318
time step 12
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9991467916203394
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 318
time step 13
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9991467916203394
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 318
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9991467916203394
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 318
time step 15
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900999886076243
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)



(0, 0, 4, 2)

Episode 319
time step 25
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 319
time step 26
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 319 is done after time steps 26
(0, 0, 3, 1)

Episode 320
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900088239044613
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 320
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9989381484375282
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 320
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9989381484375282
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 320
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901294880189306
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 320
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902175746231168
Explore rate 1


(0, 0, 4, 1)

Episode 323
time step 21
Action 0
State (0, 0, 4, 1)
Reward 1.0
Best Q 0.9900990836286845
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episode 323
time step 22
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 0.9900990836286845
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 323
time step 23
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990113122101
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 323
time step 24
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990113122101
Explore rate 1
Learning 1
Streaks 0

Episode 323 is done after time steps 24
(0, 0, 2, 1)

Episode 324
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902809347408188
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 324
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900998887193555
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 324
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990011128064
Explore rate 

(0, 0, 3, 2)

Episode 326
time step 25
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009870838
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 326
time step 26
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099012916
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 326
time step 27
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990113122101
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 326
time step 28
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990113122101
Explore rate 1
Learning 1
Streaks 0

Episode 326 is done after time steps 28
(0, 0, 2, 1)

Episode 327
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901309448639533
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 327
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901913206299064
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 327
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901907168722468
Explore rate 1

(0, 0, 2, 0)

Episode 328
time step 10
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099061098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 328
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990100816942
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 328
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990195092183
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 328
time step 13
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990195092183
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 328
time step 14
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990194131456
Explore rate 1
Learning 1
Streaks 0

Episode 328 is done after time steps 14
(0, 0, 3, 1)

Episode 329
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9902758229487554
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 329
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9902758229487554
Explore rate

(0, 0, 2, 0)

Episode 332
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990192301211
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 332
time step 6
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990192301211
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 332
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990188645489
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 332
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098113545
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 332
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099018865
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 332
time step 10
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009900981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 332
time step 11
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990098068196
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episo

(0, 0, 2, 0)

Episode 334
time step 22
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990358450823
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 334
time step 23
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099010411394
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 334
time step 24
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099019318
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 334
time step 25
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099019318
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 334
time step 26
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009805
Explore rate 1
Learning 1
Streaks 0

Episode 334 is done after time steps 26
(0, 0, 2, 1)

Episode 335
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900981714804504
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 335
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990182851954
Explore rate 

(0, 0, 3, 1)

Episode 339
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990273631642
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 339
time step 3
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990097263682
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 339
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990175353082
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 339
time step 5
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990176335869
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 339
time step 6
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990176335869
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 339
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990176335869
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 339
time step 8
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990175353082
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episod

(0, 0, 2, 0)

Episode 342
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009901932
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 342
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009901932
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 342
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009808
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 342
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 342
time step 14
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 342
time step 15
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099025762
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 342
time step 16
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

E

(0, 0, 3, 2)

Episode 345
time step 18
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991873828149
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 345
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991873828149
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 345
time step 20
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990081261718
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 345
time step 21
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991892258137
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 345
time step 22
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991892258137
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 345
time step 23
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991927398191
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 345
time step 24
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991927046789
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 2, 1)

Episode 348
time step 17
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900993015747388
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 348
time step 18
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900993015747388
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 348
time step 19
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 348
time step 20
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 348
time step 21
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 348
time step 22
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 348
time step 23
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 4, 2)

Episode 350
time step 20
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990800970506
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 350
time step 21
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990800970506
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 350
time step 22
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990800970506
Explore rate 1
Learning 1
Streaks 0

Episode 350 is done after time steps 22
(0, 0, 2, 1)

Episode 351
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901169190752543
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 351
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098721232
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 351
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099012787
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 351
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900991889930184
Explore rate 1

(0, 0, 2, 1)

Episode 353
time step 5
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900991907117436
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 353
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099196029735
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 353
time step 7
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900991907117436
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 353
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990080928826
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 353
time step 9
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099722513
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 353
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990133926084
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 353
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990133926084
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episo

(0, 0, 2, 1)

Episode 356
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900996552862971
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 356
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098660738
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 356
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098660738
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 356
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990133231251
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 356
time step 6
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098667686
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 356
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009902027
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 356
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099362383
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode

(0, 0, 2, 1)

Episode 359
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099004077639
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 359
time step 15
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099592235
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 359
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990160311107
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 359
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990160311107
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 359
time step 18
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990160311107
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 359
time step 19
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099592235
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 359
time step 20
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990099009647434
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

E

(0, 0, 2, 1)

Episode 360
time step 37
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990283658568
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 360
time step 38
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990283658568
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 360
time step 39
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900996544440572
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 360
time step 40
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900996544440572
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 360
time step 41
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900996544440572
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 360
time step 42
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990155598022
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 360
time step 43
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009844402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)



(0, 0, 3, 2)

Episode 364
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990117399113
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 364
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099011728
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 364
time step 11
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900993975289352
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 364
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900993975289352
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 364
time step 13
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990060247106
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 364
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900993936340856
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 364
time step 15
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900993936340856
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)



(0, 0, 3, 0)

Episode 368
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990117817448
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 368
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990285194597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 368
time step 3
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990285194597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 368
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901027027578528
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 368
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901026104505963
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 368
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901026104505963
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 368
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901026104505963
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episod

(0, 0, 2, 1)

Episode 371
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900992124472743
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 371
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900992124565777
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 371
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900992124565777
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 371
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900992124565777
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 371
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900992124565777
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 371
time step 14
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099047706
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 371
time step 15
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099047706
Explore rate 1
Learning 1
Streaks 0

Episode 371 is

(0, 0, 2, 1)

Episode 376
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098914413
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 376
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9904428051145715
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 376
time step 2
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9904428051145715
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 376
time step 3
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901060333673528
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 376
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099066738
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 376
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099066738
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 376
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990112339974
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episod

(0, 0, 2, 1)

Episode 379
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098942846
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 379
time step 3
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990109267722
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 379
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990109267722
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 379
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.99009901092322
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 379
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098907678
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 379
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098976525
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 379
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098976525
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 

(0, 0, 3, 2)

Episode 382
time step 3
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009894
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 382
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901022101188586
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 382
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901022101188586
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 382
time step 6
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901022101188586
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 382
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009894
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 382
time step 8
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901021140105617
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 382
time step 9
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901021140105617
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episod

(0, 0, 1, 0)

Episode 385
time step 14
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 385
time step 15
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 385
time step 16
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099028704
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 385
time step 17
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 385 is done after time steps 17
(0, 0, 3, 1)

Episode 386
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990407223376
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 386
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990407223376
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 386
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990400449501
Explore rate 

(0, 0, 3, 1)

Episode 388
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099014704402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 388
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990153413491
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 388
time step 11
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900994032330024
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 388
time step 12
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900993993540859
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 388
time step 13
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.99009939932803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 388
time step 14
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.99009939932803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 388
time step 15
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990060067199
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episo

(0, 0, 2, 0)

Episode 390
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99010276847066
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 390
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99010276847066
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 390
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99010276847066
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 390
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901027309487551
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 390
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900989726905123
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 390
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990106483136
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 390
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009893517
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 390
t

(0, 0, 3, 2)

Episode 392
time step 33
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990231864419
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 392
time step 34
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990231864419
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 392
time step 35
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990231864419
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 392
time step 36
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990231864419
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 392
time step 37
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990105753094
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 392
time step 38
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990107014209
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 392
time step 39
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990107014209
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 3, 2)

Episode 396
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009912685
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 396
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009912685
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 396
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099125679
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 396
time step 11
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990716821874
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 396
time step 12
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990716821874
Explore rate 1
Learning 1
Streaks 0

Episode 396 is done after time steps 12
(0, 0, 3, 1)

Episode 397
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901063562539425
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 397
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901063562539425
Explore rate 1
L

(0, 0, 3, 2)

Episode 398
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901129658719463
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 398
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901129658719463
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 398
time step 15
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901129658719463
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 398
time step 16
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901129658719463
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 398
time step 17
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990105188021
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 398
time step 18
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990105188021
Explore rate 1
Learning 1
Streaks 0

Episode 398 is done after time steps 18
(0, 0, 3, 1)

Episode 399
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901131068413498
Explore rat

(0, 0, 1, 0)

Episode 401
time step 22
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 401
time step 23
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 401
time step 24
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 401 is done after time steps 24
(0, 0, 3, 1)

Episode 402
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990400434724
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 402
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900989001823386
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 402
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900988711370429
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 402
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900988711370429
Explore rate 1

(0, 0, 3, 2)

Episode 405
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990107831931
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 405
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990107831931
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 405
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990116564787
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 405
time step 4
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990107831931
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 405
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009892168
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 405
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990828388956
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 405
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990821094284
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode

(0, 0, 3, 0)

Episode 409
time step 5
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900991298126878
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 409
time step 6
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900991298126878
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 409
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099011251
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 409
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990552315545
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 409
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990552315545
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 409
time step 10
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990552315545
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 409
time step 11
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990550525262
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Epis

(0, 0, 3, 2)

Episode 412
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098894102
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 412
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098894102
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 412
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099011058
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 412
time step 6
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009900989
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 412
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099355247
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 412
time step 8
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099128113409
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 412
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099128113409
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 4

(0, 0, 3, 2)

Episode 414
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110879129
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 414
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110879129
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 414
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110879129
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 414
time step 8
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990110878518
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 414
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110878523
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 414
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110878523
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 414
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099011076045
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episo

(0, 0, 2, 1)

Episode 418
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990132105669
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 418
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990133980789
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 418
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990133980789
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 418
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990133980789
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 418
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990132105669
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 418
time step 7
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990132124421
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 418
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990132124421
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episod

(0, 0, 2, 1)

Episode 421
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099479707
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 421
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099474976
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 421
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099005249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 421
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 421
time step 5
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990146458435
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 421
time step 6
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990146458435
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 421
time step 7
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990146458435
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episod

(0, 0, 4, 2)

Episode 423
time step 20
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990102082969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 423
time step 21
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990102082969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 423
time step 22
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990409513069
Explore rate 1
Learning 1
Streaks 0

Episode 423 is done after time steps 22
(0, 0, 3, 1)

Episode 424
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990400022127
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 424
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990102052234
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 424
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990102052234
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 424
time step 3
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098979479
Explore rate 1

(0, 0, 2, 0)

Episode 427
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009901056
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 427
time step 15
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009714
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 427
time step 16
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009714
Explore rate 1
Learning 1
Streaks 0

Episode 427 is done after time steps 16
(0, 0, 3, 1)

Episode 428
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990105031935
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 428
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990105064151
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 428
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990105064151
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 428
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009299
Explore rate 1


(0, 0, 2, 0)

Episode 431
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099059193
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 431
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010378
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 431
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009896
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 431
time step 10
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099010090955
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 431
time step 11
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099010090955
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 431
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099010090955
Explore rate 1
Learning 1
Streaks 0

Episode 431 is done after time steps 12
(0, 0, 2, 1)

Episode 432
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990103796836
Explore rate 1
Le

(0, 0, 3, 2)

Episode 433
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099010214
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 433
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099010212
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 433
time step 10
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099041544
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episode 433
time step 11
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 0.9900991325289219
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 433
time step 12
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900991312994782
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 433
time step 13
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900991312994782
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 433
time step 14
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900991312994782
Explore rate 1
Learning 1
Streaks 0

Episode 433 is 

(0, 0, 2, 0)

Episode 437
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9998961152914503
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 437
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900010388470855
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 437
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9995080634694289
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 437
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.999511943987649
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 437
time step 10
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9995080634694289
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 437
time step 11
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900049193653055
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 437
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900999508063468
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Epis

(0, 0, 3, 2)

Episode 440
time step 18
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110907881
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 440
time step 19
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009592
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 440
time step 20
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900991300856763
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 440
time step 21
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900991300856763
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 440
time step 22
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900991300855548
Explore rate 1
Learning 1
Streaks 0

Episode 440 is done after time steps 22
(0, 0, 2, 1)

Episode 441
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.998954549440519
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 441
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901009399451373
Explore rate 

(0, 0, 3, 1)

Episode 444
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990079414203
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 444
time step 3
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990099287212
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 444
time step 4
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990118880137
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 444
time step 5
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990118880137
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 444
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991296726649
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 444
time step 7
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991296726649
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 444
time step 8
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991264879246
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episod

(0, 0, 3, 2)

Episode 447
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110553576
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 447
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098894464
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 447
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099011056
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 447
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099011067249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 447
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099011067249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 447
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099011067249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 447
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099011056
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episod

(0, 0, 2, 1)

Episode 450
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900991750799373
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 450
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991717539946
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 450
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991717539946
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 450
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991717539946
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 450
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991717539946
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 450
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900991717539946
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 450
time step 10
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episo

(0, 0, 3, 1)

Episode 453
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991612368757
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 453
time step 4
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900993045799515
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 453
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900993045799515
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 453
time step 6
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991502972762
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 453
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990084970274
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 453
time step 8
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990114578567
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 453
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990128462508
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episod

(0, 0, 3, 2)

Episode 455
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099306173
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 455
time step 11
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099003897
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 455
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099003897
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 455
time step 13
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990114868673
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 455
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099006921
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 455
time step 15
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 455
time step 16
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)


(0, 0, 2, 1)

Episode 458
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 458
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 458
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 458
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 458
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 458
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 458
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episod

(0, 0, 3, 1)

Episode 459
time step 21
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991625015703
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 459
time step 22
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991625015703
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 459
time step 23
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991609908127
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 459
time step 24
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990083900918
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 459
time step 25
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099316902
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 459
time step 26
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990114422814
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 459
time step 27
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990114416666
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 3, 1)

Episode 463
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990114109887
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 463
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990144767658
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 463
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990144767658
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 463
time step 3
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.990099028136102
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 463
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990110547718
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 463
time step 5
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990110547718
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 463
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099014757
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode

(0, 0, 3, 1)

Episode 465
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099013185
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 465
time step 10
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990202774342
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 465
time step 11
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099020173673
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 465
time step 12
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990097186388
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 465
time step 13
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990097186388
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 465
time step 14
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990099028137
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 465
time step 15
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098991465
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

E

(0, 0, 2, 1)

Episode 467
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099009946932
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 467
time step 5
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099456168
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 467
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099005438
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 467
time step 7
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990098961988
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 467
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099005307
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 467
time step 9
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990099053266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 467
time step 10
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990099053266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episod

(0, 0, 3, 0)

Episode 468
time step 39
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990099010738
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 468
time step 40
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099100309
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 468
time step 41
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098991465
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 468
time step 42
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098991465
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 468
time step 43
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099027054
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 468
time step 44
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099045319
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 468
time step 45
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099027054
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 1, 0)

Episode 472
time step 11
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9901960584813276
Explore rate 1
Learning 1
Streaks 0

Episode 472 is done after time steps 11
(0, 0, 2, 1)

Episode 473
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990100858196
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 473
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098991418
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 473
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099010087109
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 473
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098991418
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 473
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099010085
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 473
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Le

(0, 0, 3, 2)

Episode 474
time step 39
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099010278
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 474
time step 40
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099010278
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 474
time step 41
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 474
time step 42
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 474
time step 43
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009914
Explore rate 1
Learning 1
Streaks 0

Episode 474 is done after time steps 43
(0, 0, 2, 1)

Episode 475
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900999413042146
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 475
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990101741096542
Explore rate 

(0, 0, 2, 1)

Episode 477
time step 14
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902701678636741
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 477
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902701678636741
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 477
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902683323302983
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 477
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902683506856318
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 477
time step 18
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902683323302983
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 477
time step 19
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900973166766969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 477
time step 20
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990270167862
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 2, 0)

Episode 481
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098096552
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 481
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099019036
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 481
time step 6
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009900981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 481
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990194118531
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 481
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009900981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 481
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 481
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode

(0, 0, 3, 2)

Episode 484
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099008422
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 484
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099008422
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 484
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 484
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 484
time step 13
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 484
time step 14
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 484
time step 15
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 484 is

(0, 0, 3, 1)

Episode 488
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990374239937
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 488
time step 11
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901867972672518
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 488
time step 12
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901867972672518
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 488
time step 13
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990192234744
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 488
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990195973716
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 488
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990101779235
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 488
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990101779235
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 2, 0)

Episode 490
time step 25
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099850842
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 490
time step 26
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099001499
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 490
time step 27
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009985
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 490
time step 28
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099018396
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 490
time step 29
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099922173
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 490
time step 30
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099922173
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 490
time step 31
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099922173
Explore rate 1
Learning 1
Streaks 0

Episode 490 i

(0, 0, 3, 1)

Episode 494
time step 8
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991201356177
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 494
time step 9
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991201356177
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 494
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990013872755
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 494
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990013872755
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 494
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099861273
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 494
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099001388
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 494
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009985
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

E

(0, 0, 3, 1)

Episode 496
time step 31
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990183309621
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 496
time step 32
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099009848
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 496
time step 33
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099852844
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 496
time step 34
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099852844
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 496
time step 35
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901807440484325
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 496
time step 36
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901807440484325
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 496
time step 37
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9902642490588875
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 3, 2)

Episode 498
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099007234
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 498
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009928
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 498
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 498
time step 9
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900991300886259
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 498
time step 10
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900991300886259
Explore rate 1
Learning 1
Streaks 0

Episode 498 is done after time steps 10
(0, 0, 2, 1)

Episode 499
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990256980844
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 499
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990193168471754
Explore rate 1
Le

(0, 0, 2, 2)

Episode 502
time step 9
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990099272369922
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 502
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901747425561807
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 502
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901747425561807
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 502
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901747425561807
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 502
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901747425561807
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 502
time step 14
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901747425561807
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 502
time step 15
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901739852296287
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

E

(0, 0, 2, 2)

Episode 506
time step 7
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901739834355934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 506
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901739756638883
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 506
time step 9
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901739756638883
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 506
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901739756638883
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 506
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990102398486044
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 506
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901016589489766
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 506
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990024033383
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epi

(0, 0, 3, 2)

Episode 509
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900999395442023
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 509
time step 9
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099008697
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 509
time step 10
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099008697
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 509
time step 11
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990152068658
Explore rate 1
Learning 1
Streaks 0

Episode 509 is done after time steps 11
(0, 0, 2, 1)

Episode 510
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099946858275
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 510
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098096598
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 510
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099019180146
Explore rate 1
Le

(0, 0, 2, 0)

Episode 511
time step 48
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901717074686598
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 511
time step 49
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901717074686598
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 511
time step 50
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099932736
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 511
time step 51
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099932736
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 511
time step 52
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099000673
Explore rate 1
Learning 1
Streaks 0

Episode 511 is done after time steps 52
(0, 0, 3, 1)

Episode 512
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901944237112879
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 512
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.99009900828118
Explore rate 1

(0, 0, 2, 0)

Episode 514
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990162192611
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 514
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900997358313985
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 514
time step 12
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099932454
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 514
time step 13
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099932454
Explore rate 1
Learning 1
Streaks 0

Episode 514 is done after time steps 13
(0, 0, 2, 1)

Episode 515
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900997672399228
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 515
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990036740518
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 515
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990036740518
Explore rate 

Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 518
time step 18
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990173591797
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 518
time step 19
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990098484608
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 518
time step 20
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990098484608
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 518
time step 21
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099015152
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 518
time step 22
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099540397
Explore rate 1
Learning 1
Streaks 0

Episode 518 is done after time steps 22
(0, 0, 3, 1)

Episode 519
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099010000645
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 519
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.99009900

(0, 0, 1, 0)

Episode 520
time step 12
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 520
time step 13
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 520
time step 14
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 520 is done after time steps 14
(0, 0, 2, 1)

Episode 521
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990102995662
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 521
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901712733214241
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 521
time step 2
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901712733214241
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 521
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901712733214241
Explore rate 1

(0, 0, 3, 0)

Episode 523
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901681386756691
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 523
time step 2
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901681386756691
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 523
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099687107322
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 523
time step 4
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099687107322
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 523
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099000766
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 523
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990475768467
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 523
time step 7
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990475768467
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 

(0, 0, 2, 1)

Episode 524
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989959094006
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 524
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099010040906
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 524
time step 7
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990452640022
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 524
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990452640022
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 524
time step 9
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990447704563
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 524
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990095522955
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 524
time step 11
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900997181312332
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episo

(0, 0, 4, 2)

Episode 526
time step 15
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009952
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episode 526
time step 16
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 0.9900991244489863
Explore rate 1
Learning 1
Streaks 0

Episode 526 is done after time steps 16
(0, 0, 3, 1)

Episode 527
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900997367204447
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 527
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900998365208753
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 527
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900998365208753
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 527
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991014352218
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 527
time step 4
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990089856476
Explore rate 1


(0, 0, 3, 2)

Episode 529
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110333108
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 529
time step 15
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990110350154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 529
time step 16
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990110350154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 529
time step 17
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990110350154
Explore rate 1
Learning 1
Streaks 0

Episode 529 is done after time steps 17
(0, 0, 3, 1)

Episode 530
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990228505617
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 530
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990341657574
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 530
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990340526055
Explore rate 

(0, 0, 2, 0)

Episode 532
time step 31
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099036455941
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 532
time step 32
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9901941371967506
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 532
time step 33
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099009900981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 532
time step 34
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099009900981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 532
time step 35
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

Episode 532 is done after time steps 35
(0, 0, 3, 1)

Episode 533
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990225942077
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 533
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901016143946273
Explore rate 1


(0, 0, 2, 0)

Episode 534
time step 45
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990195022163
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 534
time step 46
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099546088
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 534
time step 47
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099004539
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 534
time step 48
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 534
time step 49
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900980489212241
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 534
time step 50
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 534
time step 51
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 2, 1)

Episode 536
time step 7
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901015119473975
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 536
time step 8
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990099029339
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 536
time step 9
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990099029339
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 536
time step 10
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990099009706
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 536
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900988873796326
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 536
time step 12
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900988873796326
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 536
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099080324627
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epi

(0, 0, 3, 2)

Episode 538
time step 18
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098821665
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 538
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990110171746
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 538
time step 20
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990129064666
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 538
time step 21
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990129064666
Explore rate 1
Learning 1
Streaks 0

Episode 538 is done after time steps 21
(0, 0, 3, 1)

Episode 539
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990117843087
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 539
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990128952354
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 539
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990128952354
Explore rate 

(0, 0, 3, 2)

Episode 542
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098712354
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 542
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099476457
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 542
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099769338
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 542
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099769338
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 542
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099769338
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 542
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099769338
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 542
time step 11
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990110425032
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Epis

(0, 0, 2, 1)

Episode 543
time step 54
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990128279263
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 543
time step 55
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990128577991
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 543
time step 56
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990128577991
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 543
time step 57
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990099009871422
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 543
time step 58
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990117480857
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 543
time step 59
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099002307
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 543
time step 60
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099002307
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)



(0, 0, 2, 0)

Episode 544
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099117402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 544
time step 16
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099117402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 544
time step 17
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 544
time step 18
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 544
time step 19
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990194146897
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 544
time step 20
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 544 is done after time steps 20
(0, 0, 2, 1)

Episode 545
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990116820654
Explore rat

(0, 0, 1, 0)

Episode 547
time step 12
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 547
time step 13
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9901941371967595
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 547
time step 14
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9901931858286648
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 547
time step 15
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9901931858286648
Explore rate 1
Learning 1
Streaks 0

Episode 547 is done after time steps 15
(0, 0, 2, 1)

Episode 548
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099012845
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 548
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990110018009
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 548
time step 2
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990110018009
Explore rate 

(0, 0, 2, 1)

Episode 550
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900983880779319
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 550
time step 15
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990161192207
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 550
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901605783047467
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 550
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901605783047467
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 550
time step 18
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.990099009889982
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 550
time step 19
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901605783047467
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 550
time step 20
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901605783047467
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)



(0, 0, 4, 2)

Episode 552
time step 13
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901544605634889
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 552
time step 14
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901544605634889
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 552
time step 15
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900984553943653
Explore rate 1
Learning 1
Streaks 0

Episode 552 is done after time steps 15
(0, 0, 3, 1)

Episode 553
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901528907057724
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 553
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901010607078096
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 553
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901015790077894
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 553
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099502509902
Explore rate 1


(0, 0, 3, 1)

Episode 555
time step 15
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901010970057678
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 555
time step 16
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901010703965369
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 555
time step 17
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900989892960346
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 555
time step 18
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990103731317
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 555
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991662599772
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 555
time step 20
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991647011086
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 555
time step 21
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991647011086
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 3, 2)

Episode 558
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901001202930853
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 558
time step 12
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901001202930853
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 558
time step 13
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009852
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 558
time step 14
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990210049061
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 558
time step 15
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990210049061
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 558
time step 16
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990210049061
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 558
time step 17
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990210049061
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 2, 1)

Episode 561
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900996012848582
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 561
time step 11
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900996012848582
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 561
time step 12
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900996012848582
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 561
time step 13
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900991043012335
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 561
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990089569877
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 561
time step 15
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099104301
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 561
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900995954897163
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 2, 0)

Episode 564
time step 14
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990041967161
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 564
time step 15
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990041967161
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 564
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.99015297556196
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 564
time step 17
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901524358483074
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 564
time step 18
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901524358483074
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 564
time step 19
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901524358483074
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 564
time step 20
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901524358483074
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

E

(0, 0, 2, 1)

Episode 568
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990047698999
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 568
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990100080669
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 568
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990100080669
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 568
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098999192
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 568
time step 10
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010007
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 568
time step 11
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099010085
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 568
time step 12
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099010085
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Epi

(0, 0, 2, 1)

Episode 571
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900995336344076
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 571
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901677810013438
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 571
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901670985276745
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 571
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901670985276745
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 571
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901670985276745
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 571
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990149289037
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 571
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990149289037
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episod

(0, 0, 3, 1)

Episode 574
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.9510351165124882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 574
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.9510351165124882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 574
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.9510351165124882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 574
time step 15
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901010520555502
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 574
time step 16
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.9510351165124882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 574
time step 17
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.9510351165124882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 574
time step 18
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901010420563876
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 3, 0)

Episode 576
time step 13
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900989894794445
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 576
time step 14
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990101052056
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 576
time step 15
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.9416160962940248
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 576
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099014508
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 576
time step 17
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099014508
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 576
time step 18
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099019069
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 576
time step 19
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099014508
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 2, 0)

Episode 578
time step 22
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901032287400495
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 578
time step 23
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 578
time step 24
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 578
time step 25
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990520893806
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 578
time step 26
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990520893806
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 578
time step 27
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990516674968
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 578
time step 28
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 2, 1)

Episode 581
time step 14
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9991470034358865
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 581
time step 15
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9991470034358865
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 581
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.8858504105060356
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 581
time step 17
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.8858504105060356
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 581
time step 18
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.8858504105060356
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 581
time step 19
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990194146897
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 581
time step 20
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990194146897
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 3, 2)

Episode 584
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0084721069976514
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 584
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0084721069976514
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 584
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0170587248328575
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 584
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.7452808635926762
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 584
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.7452808635926762
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 584
time step 14
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.025056048715256
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 584
time step 15
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9897494395128472
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

E

(0, 0, 1, 0)

Episode 587
time step 16
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990194137478
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 587
time step 17
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990194137478
Explore rate 1
Learning 1
Streaks 0

Episode 587 is done after time steps 17
(0, 0, 3, 1)

Episode 588
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8942309794440662
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 588
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8942309794440662
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 588
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.032744909953432
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 588
time step 3
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0413597706483384
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 588
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0413597706483384
Explore rate 1
L

(0, 0, 3, 0)

Episode 589
time step 12
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.9032908403096256
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 589
time step 13
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8941589224335833
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 589
time step 14
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8941589224335833
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 589
time step 15
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8941589224335833
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 589
time step 16
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.8941589224335833
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 589
time step 17
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099820126
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 589
time step 18
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9991396091065283
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 2, 0)

Episode 591
time step 3
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990097531146
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 591
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901006421231775
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 591
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901006421231775
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 591
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990100625948831
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 591
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900989937405118
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 591
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990100625948
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 591
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990263832008
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode

(0, 0, 2, 2)

Episode 594
time step 2
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.0081044759711375
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 594
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9908201676515298
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 594
time step 4
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9990352592796214
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 594
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9990352592796214
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 594
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900096474072038
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 594
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900999035259279
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 594
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990009647408
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episod

(0, 0, 3, 2)

Episode 597
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.8584926044518097
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 597
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.849808668507195
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 597
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.8498955078666415
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 597
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.8498955078666415
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 597
time step 12
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9815878842807801
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Episode 597
time step 13
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 1.9508455260791997
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 597
time step 14
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901009410014876
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 1)

Ep

(0, 0, 1, 0)

Episode 599
time step 26
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990660308602
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 599
time step 27
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990660308602
Explore rate 1
Learning 1
Streaks 0

Episode 599 is done after time steps 27
(0, 0, 2, 1)

Episode 600
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900996707406597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 600
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9899372249480012
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 600
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.941066110301769
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 600
time step 3
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.941066110301769
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 600
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9986937230280681
Explore rate 1
Le

(0, 0, 2, 0)

Episode 602
time step 18
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902536684831098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 602
time step 19
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902536684831098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 602
time step 20
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990660308602
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 602
time step 21
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990660308602
Explore rate 1
Learning 1
Streaks 0

Episode 602 is done after time steps 21
(0, 0, 3, 1)

Episode 603
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9903849660648871
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 603
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0079428458441315
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 603
time step 2
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0079428458441315
Explore rate 

(0, 0, 3, 0)

Episode 604
time step 39
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0055197420964144
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 604
time step 40
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.005323079729465
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 604
time step 41
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9899467692027053
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 604
time step 42
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901024989316423
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 604
time step 43
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990023683895
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 604
time step 44
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990023683895
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 604
time step 45
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099763161
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)



(0, 0, 3, 2)

Episode 607
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901939774269148
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 607
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901939925117387
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 607
time step 13
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990100941003
Explore rate 1
Learning 1
Streaks 0

Episode 607 is done after time steps 13
(0, 0, 3, 1)

Episode 608
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9902479492087006
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 608
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900980600748825
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 608
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900995589662207
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 608
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900995589662207
Explore rate 1

(0, 0, 2, 0)

Episode 609
time step 30
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9993374341671937
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 609
time step 31
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9993374341671937
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 609
time step 32
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9992450297978872
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 609
time step 33
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990195107877
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 609
time step 34
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990195107877
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 609
time step 35
Action 1
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990098048921
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 609
time step 36
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990109350614
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)


(0, 0, 3, 2)

Episode 612
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901044981127076
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 612
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901044981127076
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 612
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901026477204764
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 612
time step 10
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990100941003
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 612
time step 11
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990100941003
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 612
time step 12
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.990099009899059
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 612
time step 13
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900999896548031
Explore rate 1
Learning 1
Streaks 0

Episode 612 is do

(0, 0, 3, 1)

Episode 615
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901018209281314
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 615
time step 10
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901017998735451
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 615
time step 11
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900989820012647
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 615
time step 12
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990103905334
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 615
time step 13
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990382853634
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 615
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990103905334
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 615
time step 15
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990098960947
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)



(0, 0, 2, 2)

Episode 615
time step 73
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.0026425729941102
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 615
time step 74
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.002645163076948
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 615
time step 75
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.002645163076948
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 615
time step 76
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0026475354868358
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 615
time step 77
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900988982734169
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 615
time step 78
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900988982734169
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 615
time step 79
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990110172658
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

E

(0, 0, 3, 2)

Episode 619
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.991490513998937
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 619
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.991490513998937
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 619
time step 13
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.991490513998937
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 619
time step 14
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.991490513998937
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 619
time step 15
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900914581160303
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 619
time step 16
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901054486748593
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 619
time step 17
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901129360720804
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epi

(0, 0, 2, 0)

Episode 622
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990838020293854
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 622
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990838020293854
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 622
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9908306301899068
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 622
time step 10
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 622
time step 11
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 622
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 622 is done after time steps 12
(0, 0, 3, 1)

Episode 623
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990092506486504
Explore rate 1
Le

(0, 0, 2, 0)

Episode 624
time step 35
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039805420342
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 624
time step 36
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 624
time step 37
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009994
Explore rate 1
Learning 1
Streaks 0

Episode 624 is done after time steps 37
(0, 0, 3, 1)

Episode 625
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901052072539382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 625
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990749351348
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 625
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990092506485
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 625
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901049902676402
Explore rate 1

(0, 0, 2, 0)

Episode 627
time step 29
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901040286058416
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 627
time step 30
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039320998959
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 627
time step 31
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039330649553
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 627
time step 32
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901039330649553
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 627
time step 33
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900989616344098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 627
time step 34
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 627
time step 35
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 3, 0)

Episode 631
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.990082864009779
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 631
time step 2
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.990082864009779
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 631
time step 3
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901025762745532
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 631
time step 4
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098928995
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 631
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098928995
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 631
time step 6
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010711
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 631
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901071821899745
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 

(0, 0, 2, 1)

Episode 633
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901040451574012
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 633
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901122504724249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 633
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901122504724249
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 633
time step 13
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901070828102994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 633
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989291718971
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 633
time step 15
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9985198317044541
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 633
time step 16
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9985198317044541
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)


(0, 0, 3, 2)

Episode 635
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901325009350437
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 635
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901315604170011
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 635
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901315604170011
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 635
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.99014284187527
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 635
time step 7
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900989742372546
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 635
time step 8
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900989742372546
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 635
time step 9
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990102576275
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 

(0, 0, 3, 2)

Episode 638
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990100241365
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 638
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990130005001
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 638
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990130005001
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 638
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990098699949
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 638
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099013001
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 638
time step 8
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099009900987
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 638
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990129726301
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode

(0, 0, 2, 1)

Episode 639
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990426285765
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 639
time step 15
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901823972364141
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 639
time step 16
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901823972364141
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 639
time step 17
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9983511608293836
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 639
time step 18
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9982686065925133
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 639
time step 19
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900977870403884
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 639
time step 20
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900977870403884
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 1, 0)

Episode 643
time step 10
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099018101
Explore rate 1
Learning 1
Streaks 0

Episode 643 is done after time steps 10
(0, 0, 2, 1)

Episode 644
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990100611547291
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 644
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901814866964205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 644
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901814866964205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 644
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901814866964205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 644
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990100611547291
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 644
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990100652403028
Explore rate 1
Lear

(0, 0, 2, 0)

Episode 646
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99009901767969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 646
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99009901767969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 646
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990502776585
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 646
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.99009901767969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 646
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009823203
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 646
time step 11
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099018019
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 646
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099018019
Explore rate 1
Learning 1
Streaks 0

Episode 646 is done afte

(0, 0, 3, 2)

Episode 649
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990134833565
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 649
time step 20
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990134833565
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 649
time step 21
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.99009900990196
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 649
time step 22
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901161749503726
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 649
time step 23
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901161749503726
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 649
time step 24
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901161749503726
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 649
time step 25
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990160072385
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

E

(0, 0, 3, 1)

Episode 650
time step 29
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900992458995344
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 650
time step 30
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990766401452
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 650
time step 31
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990092335986
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 650
time step 32
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990116002579
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 650
time step 33
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990122506568
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 650
time step 34
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990122506568
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 650
time step 35
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901161740181594
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 2, 0)

Episode 652
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990115859809
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 652
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990115859809
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 652
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990115859809
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 652
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990067116888
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 652
time step 7
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990067116888
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 652
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099017071
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 652
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900991684050164
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episod

(0, 0, 2, 1)

Episode 655
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099323663
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 655
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099011455
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 655
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009885
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 655
time step 13
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099013023
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 655
time step 14
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009885
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 655
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 655
time step 16
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009932
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 2, 0)

Episode 657
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098853044
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 657
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099011469
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 657
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009885
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 657
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900999696455128
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 657
time step 10
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 1.950693752219727
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 657
time step 11
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 1.950693752219727
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 657
time step 12
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9804930624778028
Explore rate 1
Learning 1
Streaks 0

Episode 657 is done

(0, 0, 3, 1)

Episode 660
time step 16
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900998415878264
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 660
time step 17
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900989931832427
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 660
time step 18
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900989931832427
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 660
time step 19
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990098841416
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 660
time step 20
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990098841416
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 660
time step 21
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099011586
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 660
time step 22
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900999600480691
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 2, 2)

Episode 663
time step 5
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900995008430604
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 663
time step 6
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900995008430604
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 663
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.9035671673253645
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 663
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.9035671673253645
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 663
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.9035671673253645
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 663
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.9035671673253645
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 663
time step 11
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.9035671673253645
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epis

(0, 0, 3, 1)

Episode 667
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0082769356133605
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 667
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0082769356133605
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 667
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0077228035144563
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 667
time step 3
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.990099164991592
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 667
time step 4
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0077228035144563
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 667
time step 5
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0014167207369422
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 667
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0014167207369422
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode

(0, 0, 2, 1)

Episode 669
time step 28
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0244171331143561
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 669
time step 29
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0244171331143561
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 669
time step 30
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 669
time step 31
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0065786759932758
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 669
time step 32
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0406380532228705
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 669
time step 33
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0406380532228705
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 669
time step 34
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0406380532228705
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 4, 2)

Episode 670
time step 22
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990007634327
Explore rate 1
Learning 1
Streaks 0

Episode 670 is done after time steps 22
(0, 0, 3, 1)

Episode 671
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.734241562590473
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 671
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9965182697859774
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 671
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900348173021403
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 671
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900996518269785
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 671
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0075410709346135
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 671
time step 5
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0075465488772541
Explore rate 1
Le

(0, 0, 2, 0)

Episode 674
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.8938909669409214
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 674
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9992289373029042
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 674
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0081755575992841
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 674
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0081755575992841
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 674
time step 13
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0486145784957266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 674
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0486145784957266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 674
time step 15
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0299528751114853
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)



(0, 0, 4, 2)

Episode 677
time step 10
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009992
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 677
time step 11
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009992
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 677
time step 12
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

Episode 677 is done after time steps 12
(0, 0, 2, 1)

Episode 678
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900998701903907
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 678
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900998615865642
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 678
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990013841344
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 678
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099861586
Explore rate 1

(0, 0, 4, 2)

Episode 680
time step 13
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099019036
Explore rate 1
Learning 1
Streaks 0

Episode 680 is done after time steps 13
(0, 0, 2, 1)

Episode 681
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9978847330501999
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 681
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990019688829
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 681
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.7832304787657205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 681
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.7832304787657205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 681
time step 4
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.7832304787657205
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 681
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.78322627043997
Explore rate 1
Lea

(0, 0, 3, 1)

Episode 682
time step 37
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9897498723348466
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 682
time step 38
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901025012766516
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 682
time step 39
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9977976503099066
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 682
time step 40
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9977976503099066
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 682
time step 41
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9977171725301561
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 682
time step 42
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9977179773079534
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 682
time step 43
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9977179773079534
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 2, 0)

Episode 685
time step 12
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900999235008427
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 685
time step 13
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901034070864292
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 685
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9898157826668406
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 685
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900999235008427
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 685
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900999235008427
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 685
time step 17
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9901903240145045
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 685
time step 18
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9901903240145045
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 1, 0)

Episode 687
time step 14
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990190324015
Explore rate 1
Learning 1
Streaks 0

Episode 687 is done after time steps 14
(0, 0, 3, 1)

Episode 688
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9987192691150417
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 688
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902407692362627
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 688
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9987192691150417
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 688
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9987192691150417
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 688
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900128073088497
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 688
time step 5
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900998719269114
Explore rate 1
L

(0, 0, 1, 0)

Episode 689
time step 23
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900999979915914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 689
time step 24
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900999979915914
Explore rate 1
Learning 1
Streaks 0

Episode 689 is done after time steps 24
(0, 0, 2, 1)

Episode 690
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9983310131933989
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 690
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901046218866567
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 690
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9983310131933989
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 690
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9983310131933989
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 690
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990016689868066
Explore rate 1
L

(0, 0, 3, 1)

Episode 692
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0220176628561366
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 692
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0220176628561366
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 692
time step 5
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901872231376156
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 692
time step 6
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0220176628561366
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 692
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901814288234236
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 692
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9981625921176835
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 692
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9981625921176835
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episod

(0, 0, 2, 0)

Episode 695
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0060625827939154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 695
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0060625827939154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 695
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0060625827939154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 695
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0060625827939154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 695
time step 8
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900999789792742
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 695
time step 9
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900999789792742
Explore rate 1
Learning 1
Streaks 0

Episode 695 is done after time steps 9
(0, 0, 3, 1)

Episode 696
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0201930219769983
Explore rate 1
Lea

(0, 0, 3, 1)

Episode 698
time step 11
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0213161347233033
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 698
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0161798007230838
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 698
time step 13
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9894927102861111
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 698
time step 14
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0161798007230838
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 698
time step 15
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0165252924297419
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 698
time step 16
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.04299342038528
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 698
time step 17
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.04299342038528
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Epi

(0, 0, 2, 0)

Episode 701
time step 19
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0061391840020941
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 701
time step 20
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0219423647478834
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 701
time step 21
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0219423647478834
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 701
time step 22
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0055837570983104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 701
time step 23
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9916835932581853
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 701
time step 24
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0055837570983104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 701
time step 25
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0055837570983104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 2, 0)

Episode 704
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009748
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 704
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990227427704
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 704
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009748
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 704
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 704
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 704
time step 13
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 704
time step 14
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 704 is 

(0, 0, 1, 0)

Episode 707
time step 28
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990195917729
Explore rate 1
Learning 1
Streaks 0

Episode 707 is done after time steps 28
(0, 0, 3, 1)

Episode 708
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900960399649266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 708
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990350275733
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 708
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990050769469
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 708
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099005462
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 708
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099005462
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 708
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009945
Explore rate 1
L

(0, 0, 3, 2)

Episode 711
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991393062717
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 711
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991393062717
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 711
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991393062717
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 711
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990091109207
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 711
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990104128744
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 711
time step 10
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901913084588903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 711
time step 11
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901913084588903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Epis

(0, 0, 3, 2)

Episode 715
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990187681019421
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 715
time step 20
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901885857263324
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 715
time step 21
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990187681019421
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 715
time step 22
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900981231898058
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 715
time step 23
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990187681018
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 715
time step 24
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009812319
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 715
time step 25
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901912812308251
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Ep

(0, 0, 2, 1)

Episode 715
time step 83
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902464051382578
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 715
time step 84
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 715
time step 85
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 715
time step 86
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 715
time step 87
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990098040823
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 715
time step 88
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990098040823
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 715
time step 89
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9995157004756106
Explore rate 1
Learning 1
Streaks 0

Episode 715 i

(0, 0, 3, 2)

Episode 718
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901008285431279
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 718
time step 9
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901885942434587
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 718
time step 10
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901885942434587
Explore rate 1
Learning 1
Streaks 0

Episode 718 is done after time steps 10
(0, 0, 3, 1)

Episode 719
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0046446659619734
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 719
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901876984000197
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 719
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0046446659619734
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 719
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0044973907592258
Explore rate 1
L

(0, 0, 2, 0)

Episode 722
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9992855903737623
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 722
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9907978777575981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 722
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9907978777575981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 722
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900962771652981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 722
time step 5
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.0039283383932107
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 722
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0037857618380577
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 722
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9989006807144161
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episod

(0, 0, 1, 0)

Episode 724
time step 29
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9994215336667722
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 724
time step 30
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9994215336667722
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 724
time step 31
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900057846633323
Explore rate 1
Learning 1
Streaks 0

Episode 724 is done after time steps 31
(0, 0, 3, 1)

Episode 725
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9991415506020316
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 725
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9991415506020316
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 725
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9906398365054314
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 725
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900936016349458
Explore rate 1

(0, 0, 3, 1)

Episode 727
time step 26
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9990184211790314
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 727
time step 27
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9989292267424068
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 727
time step 28
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900107077325759
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 727
time step 29
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9902296170707579
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 727
time step 30
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9902265360330098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 727
time step 31
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.990094653601922
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 727
time step 32
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.999329231063113
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

E

(0, 0, 2, 1)

Episode 731
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900976413560951
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 731
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990235864391
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 731
time step 5
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.0039283383932107
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 731
time step 6
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0037900452451431
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 731
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990010707731
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 731
time step 8
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990010707731
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 731
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9988696300154718
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episod

(0, 0, 2, 1)

Episode 734
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.003254061402126
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 734
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900029759906377
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 734
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900029759906377
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 734
time step 17
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900999702400937
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 734
time step 18
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9903643868859109
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 734
time step 19
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9903643868859109
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 734
time step 20
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9903607727769581
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)



(0, 0, 3, 2)

Episode 737
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.992650316692324
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 737
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.992650316692324
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 737
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.992650316692324
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 737
time step 6
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.992650316692324
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 737
time step 7
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990193091104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 737
time step 8
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990193091104
Explore rate 1
Learning 1
Streaks 0

Episode 737 is done after time steps 8
(0, 0, 3, 1)

Episode 738
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9926516079119847
Explore rate 1
Learnin

(0, 0, 3, 1)

Episode 741
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9902678477265274
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 741
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902664145720914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 741
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902664145720914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 741
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902664145720914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 741
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902664145720914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 741
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902664145720914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 741
time step 10
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9902664145720914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episo

(0, 0, 3, 2)

Episode 744
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900980396442265
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 744
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990196035577
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 744
time step 9
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 744
time step 10
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

Episode 744 is done after time steps 10
(0, 0, 3, 1)

Episode 745
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.9438363105759413
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 745
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900130498531994
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 745
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.9438363105759413
Explore rate 1
L

(0, 0, 3, 1)

Episode 748
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9899539709491638
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 748
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.999512158050448
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 748
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9899539709491638
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 748
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009996
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 748
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9990917657872953
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 748
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0085585749401982
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 748
time step 8
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0085585749401982
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode

(0, 0, 2, 1)

Episode 750
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902386393716323
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 750
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902386393716323
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 750
time step 18
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9903314039763655
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 750
time step 19
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9903314039763655
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 750
time step 20
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901894505649698
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 750
time step 21
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901007789131264
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 750
time step 22
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901016656296449
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 2, 0)

Episode 752
time step 24
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900973429103361
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 752
time step 25
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990270775805
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 752
time step 26
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990270775805
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 752
time step 27
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990097292242
Explore rate 1
Learning 1
Streaks 0

Episode 752 is done after time steps 27
(0, 0, 3, 1)

Episode 753
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900999433440225
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 753
time step 1
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.008665346639268
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 753
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0084796926063155
Explore rate 1

(0, 0, 1, 1)

Episode 754
time step 28
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990001034653
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 754
time step 29
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990001034653
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 754
time step 30
Action 1
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990099989653
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 754
time step 31
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9902559606103718
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 754
time step 32
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990097292242
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 754
time step 33
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990097292242
Explore rate 1
Learning 1
Streaks 0

Episode 754 is done after time steps 33
(0, 0, 3, 1)

Episode 755
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.921817852095005
Explore rate

(0, 0, 3, 2)

Episode 758
time step 16
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009809656
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 758
time step 17
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099019036
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 758
time step 18
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.99009900019839
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 758
time step 19
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.99009900019839
Explore rate 1
Learning 1
Streaks 0

Episode 758 is done after time steps 19
(0, 0, 3, 1)

Episode 759
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.9035302449160731
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 759
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9995111401071894
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 759
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9995111401071894
Explore rate 1
Lea

(0, 0, 2, 0)

Episode 762
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 762
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 762
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990254589717
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 762
time step 10
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099009900973
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 762
time step 11
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.990099009900973
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 762
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990273463766
Explore rate 1
Learning 1
Streaks 0

Episode 762 is done after time steps 12
(0, 0, 2, 1)

Episode 763
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902864157618896
Explore rate 1
L

(0, 0, 2, 1)

Episode 763
time step 57
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901903815940536
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 763
time step 58
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901894677215597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 763
time step 59
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901894677215597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 763
time step 60
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901894677215597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 763
time step 61
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901894677215597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 763
time step 62
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901894677215597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 763
time step 63
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901894677215597
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 3, 0)

Episode 765
time step 27
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.0355060076916809
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 765
time step 28
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0350554582883988
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 765
time step 29
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0350554582883988
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 765
time step 30
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098992104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 765
time step 31
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0350554582883988
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 765
time step 32
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0350554582883988
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 765
time step 33
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9896494454171159
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)


(0, 0, 3, 2)

Episode 767
time step 21
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0345426186107625
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 767
time step 22
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.03416828625278
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 767
time step 23
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098992104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 767
time step 24
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990098992104
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 767
time step 25
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099010079
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 767
time step 26
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900997892132153
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 767
time step 27
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099010079
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

E

(0, 0, 3, 2)

Episode 770
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0337128512646876
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 770
time step 9
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9896628714873534
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 770
time step 10
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901033712851266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 770
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9905437773909616
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 770
time step 12
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901033712851266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 770
time step 13
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.990099019506574
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 770
time step 14
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.990099019506574
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Epi

(0, 0, 2, 1)

Episode 773
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.040894727866157
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 773
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.040894727866157
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 773
time step 13
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.033701849190801
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 773
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901036023245657
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 773
time step 15
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901036023245657
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 773
time step 16
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901036023245657
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 773
time step 17
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990374021112
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Ep

(0, 0, 3, 2)

Episode 775
time step 21
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.99037121043109
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 775
time step 22
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.99037121043109
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 775
time step 23
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.99037121043109
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 775
time step 24
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0328341091154702
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 775
time step 25
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 1.0324094801286265
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 775
time step 26
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901772029604266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 775
time step 27
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901772029604266
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episo

(0, 0, 3, 2)

Episode 778
time step 12
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9905213984643901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 778
time step 13
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9905171745787289
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 778
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9905868622730793
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 778
time step 15
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9905868622730793
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 778
time step 16
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900941313772693
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 778
time step 17
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990586862273
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 778
time step 18
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901736260039313
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)


(0, 0, 2, 0)

Episode 779
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900983293602641
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 779
time step 17
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990167063974
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 779
time step 18
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990100736997
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 779
time step 19
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9902543910053028
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 779
time step 20
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9902543910053028
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 779
time step 21
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900974560899469
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 1)

Episode 779
time step 22
Action 0
State (0, 0, 1, 1)
Reward 1.0
Best Q 0.9900990254391007
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 2)


(0, 0, 3, 1)

Episode 781
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990109813348
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 781
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990109813348
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 781
time step 15
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990170113364
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 781
time step 16
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990170113364
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 781
time step 17
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990158598884
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 781
time step 18
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900996049351933
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 781
time step 19
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900995990444403
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 2, 0)

Episode 783
time step 22
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.989923784580609
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 783
time step 23
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0077899906135679
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 783
time step 24
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0077899906135679
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 783
time step 25
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900888231644605
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 783
time step 26
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900888231644605
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 783
time step 27
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0077883061268231
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 783
time step 28
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0077883061268231
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)



(0, 0, 2, 2)

Episode 786
time step 11
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.0074807645673176
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 786
time step 12
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9899251923543269
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 786
time step 13
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.0074738474089566
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 786
time step 14
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.0074739165805402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 786
time step 15
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0073085307716438
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 786
time step 16
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0073085307716438
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 786
time step 17
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0405140642535406
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 2, 1)

Episode 789
time step 16
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.171647542010098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 789
time step 17
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.171647542010098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 789
time step 18
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9882835245798989
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 789
time step 19
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.1508399337069806
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 789
time step 20
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.5317556981766827
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 789
time step 21
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 2.3553901314049037
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 789
time step 22
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9751882597397703
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

E

(0, 0, 1, 0)

Episode 792
time step 9
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9902744995789916
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 792
time step 10
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9902744995789916
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 792
time step 11
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9904517441772085
Explore rate 1
Learning 1
Streaks 0

Episode 792 is done after time steps 11
(0, 0, 2, 1)

Episode 793
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.1508399337069806
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 793
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9993395544337331
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 793
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9993395544337331
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 793
time step 3
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9903647172041765
Explore rate 1


(0, 0, 2, 1)

Episode 796
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9919978418823721
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 796
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9919978418823721
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 796
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9919790489590703
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 796
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.1869664729130167
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 796
time step 4
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.1869664729130167
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 796
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9935011182502402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 796
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.0954663816054278
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episod

(0, 0, 2, 0)

Episode 799
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.72762263733048
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 799
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 2.7102474029550176
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 799
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.72762263733048
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 799
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990080021579
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 799
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.72762263733048
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 799
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 2.7102474029550176
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 799
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 2.7102474029550176
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 799


(0, 0, 4, 2)

Episode 800
time step 19
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009803
Explore rate 1
Learning 1
Streaks 0

Episode 800 is done after time steps 19
(0, 0, 2, 1)

Episode 801
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098907814
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 801
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0240056082766156
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 801
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0240056082766156
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 801
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0405960230238251
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 801
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0404301188763534
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 801
time step 5
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0404301188763534
Explore rate 1
L

(0, 0, 2, 1)

Episode 806
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902531678504196
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 806
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902594942498968
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 806
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901037314569783
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 806
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9909317013865497
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 806
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9909317013865497
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 806
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9909317013865497
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 806
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9909317013865497
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Epi

(0, 0, 2, 1)

Episode 809
time step 12
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.006954139446869
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 809
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9970798403478656
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 809
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9971785833388556
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 809
time step 15
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.0045694634127416
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 809
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9973451854397586
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 809
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9973451854397586
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 809
time step 18
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 3.671160219424148
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

E

(0, 0, 2, 1)

Episode 812
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.007367851863673
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 812
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902692398126551
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 812
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902692398126551
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 812
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9906098163095174
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 812
time step 4
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9902692398126551
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 812
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900973076018733
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 812
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9729986955508554
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode

(0, 0, 3, 1)

Episode 815
time step 4
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.6517594042899395
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 815
time step 5
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 3.60163320645493
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 815
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 3.60163320645493
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 815
time step 7
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 3.5300391272380973
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 815
time step 8
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 3.5300391272380973
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 815
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990101790510084
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 815
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.6517594042899395
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 81

(0, 0, 3, 0)

Episode 817
time step 15
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9902693205731616
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 817
time step 16
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900973067942684
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 817
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 3.2966781738231155
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 817
time step 18
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0143067169521918
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 817
time step 19
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9658204988463082
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 817
time step 20
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9658204988463082
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 817
time step 21
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990341795011537
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)



(0, 0, 2, 1)

Episode 817
time step 78
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900281061699987
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 817
time step 79
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900997189382998
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 817
time step 80
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901713281314211
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 817
time step 81
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0346931871052256
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 817
time step 82
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0346931871052256
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 817
time step 83
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0346931871052256
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 817
time step 84
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0346931871052256
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 3, 1)

Episode 818
time step 53
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.5678857011667722
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 818
time step 54
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9932174382067065
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 818
time step 55
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9932174382067065
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 818
time step 56
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9932174382067065
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 818
time step 57
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9753145411776183
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 818
time step 58
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9931847215174381
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 818
time step 59
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9931847215174381
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 2, 0)

Episode 821
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901051545462844
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 821
time step 11
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901051545462844
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 821
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0336632675977213
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 821
time step 13
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0336632675977213
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 821
time step 14
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9905458665940456
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 821
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900945413340596
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 821
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9905302285966961
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)


(0, 0, 2, 1)

Episode 824
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0328681826013855
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 824
time step 5
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9971826294290365
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 824
time step 6
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9971826294290365
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 824
time step 7
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.99753948496076
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 824
time step 8
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9969870066762272
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 824
time step 9
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9905259855322914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 824
time step 10
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9905259855322914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode

(0, 0, 2, 1)

Episode 825
time step 35
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990084622101
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 825
time step 36
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9968513380852291
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 825
time step 37
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.03085493993595
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 825
time step 38
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.03085493993595
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 825
time step 39
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9896914506006402
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 825
time step 40
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901030854939936
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 825
time step 41
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9907073650222766
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Epi

(0, 0, 2, 0)

Episode 826
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9905251145993659
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 826
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9905209513874931
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 826
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9905209513874931
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 826
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9905209513874931
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 826
time step 5
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9905173936198999
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 826
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990094826063801
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 826
time step 7
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 1.023695052451361
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 

(0, 0, 1, 0)

Episode 828
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 828 is done after time steps 12
(0, 0, 2, 1)

Episode 829
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989685034247
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 829
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9915949284107197
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 829
time step 2
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9915949284107197
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 829
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9915949284107197
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 829
time step 4
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990108857667757
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 829
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990108857667757
Explore rate 1
Lea

State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009925
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 833
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 833
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 833
time step 7
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 833
time step 8
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 833 is done after time steps 8
(0, 0, 3, 1)

Episode 834
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.99090224957917
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 834
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.991621365164995
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 834
time 

(0, 0, 3, 2)

Episode 835
time step 46
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009617
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 835
time step 47
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900993003493255
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 835
time step 48
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.99008378634835
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 835
time step 49
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.99008378634835
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 835
time step 50
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991621365165
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 835
time step 51
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901362264314844
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 835
time step 52
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901362264314844
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Epi

(0, 0, 2, 0)

Episode 838
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900995413726215
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 838
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900995413726215
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 838
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099546708064
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 838
time step 7
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099546708064
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 838
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0074440517273473
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 838
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0074440517273473
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 838
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0072700645020098
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode

(0, 0, 3, 2)

Episode 839
time step 37
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990100547760451
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 839
time step 38
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901020695619642
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 839
time step 39
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901020695619642
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 839
time step 40
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901020695619642
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 839
time step 41
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901005011058936
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 839
time step 42
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901020544924628
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 839
time step 43
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901005328416446
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)



(0, 0, 3, 0)

Episode 842
time step 6
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.990117319000519
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 842
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 842
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 842
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 842
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 842
time step 11
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 842
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9906072850811867
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Epis

Streaks 0

(0, 0, 3, 2)

Episode 845
time step 12
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099012064
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 845
time step 13
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099038334
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 845
time step 14
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099038334
Explore rate 1
Learning 1
Streaks 0

Episode 845 is done after time steps 14
(0, 0, 2, 1)

Episode 846
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990423089941713
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 846
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9907435665072626
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 846
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9907435665072626
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 846
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901040418287317
Expl

(0, 0, 1, 0)

Episode 848
time step 34
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 848
time step 35
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

Episode 848 is done after time steps 35
(0, 0, 2, 1)

Episode 849
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9903920354285751
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 849
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990084402803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 849
time step 2
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990084402803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 849
time step 3
Action 1
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990099155971
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 849
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099035909
Explore rate 1


(0, 0, 3, 1)

Episode 852
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991284070759
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 852
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901021276106448
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 852
time step 3
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901021276106448
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 852
time step 4
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901019779274622
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 852
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901019794242942
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 852
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901019794242942
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 852
time step 7
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900992433112281
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episod

(0, 0, 2, 1)

Episode 854
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989516065248
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 854
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901047800795566
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 854
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990108843701
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 854
time step 7
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990108843701
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 854
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990691690015
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 854
time step 9
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901048985606589
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 854
time step 10
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901048402667423
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episo

(0, 0, 3, 2)

Episode 857
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990832788332
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 857
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901027179012891
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 857
time step 8
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904662185530546
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 857
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904662185530546
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 857
time step 10
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904662185530546
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 857
time step 11
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904662185530546
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 857
time step 12
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904662185530546
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Epi

(0, 0, 2, 0)

Episode 860
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990119632951
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 860
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099013001541
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 860
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099013001541
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 860
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099013001541
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 860
time step 18
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099013001541
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 860
time step 19
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 860
time step 20
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Epi

(0, 0, 2, 1)

Episode 863
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990466178639
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 863
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9904570059314435
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 863
time step 2
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9904570059314435
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 863
time step 3
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901097736257247
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 863
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990426799232
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 863
time step 5
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990426799232
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 863
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990095732007
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episod

(0, 0, 3, 2)

Episode 864
time step 37
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990095409972
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 864
time step 38
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099045899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 864
time step 39
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901097411781448
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 864
time step 40
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990462640811
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 864
time step 41
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990462640811
Explore rate 1
Learning 1
Streaks 0

Episode 864 is done after time steps 41
(0, 0, 3, 1)

Episode 865
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904519156940632
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 865
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904519156940632
Explore rate

(0, 0, 3, 0)

Episode 866
time step 23
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901070397231253
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 866
time step 24
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900989296027687
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 866
time step 25
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990107039722
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 866
time step 26
Action 0
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990910007605
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 866
time step 27
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098940505
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 866
time step 28
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990098940505
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 866
time step 29
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010596
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 2, 0)

Episode 869
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010245
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 869
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 869
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010234
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 869
time step 10
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010234
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 869
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010232
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 869
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 869
time step 13
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 869 is d

(0, 0, 2, 1)

Episode 872
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901006884193007
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 872
time step 1
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900974689134556
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 872
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009904623
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 872
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009904623
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 872
time step 4
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009904623
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 872
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099045868
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 872
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009541
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 8

(0, 0, 4, 2)

Episode 874
time step 14
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099046272
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 874
time step 15
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099046272
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 874
time step 16
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009537
Explore rate 1
Learning 1
Streaks 0

Episode 874 is done after time steps 16
(0, 0, 2, 1)

Episode 875
time step 0
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901021417226723
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 875
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099066613
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 875
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099009335
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 875
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990100476825
Explore rate 1

(0, 0, 3, 2)

Episode 877
time step 29
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990116321395
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 877
time step 30
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990116321395
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 877
time step 31
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 877
time step 32
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

Episode 877 is done after time steps 32
(0, 0, 2, 1)

Episode 878
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990100363753
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 878
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 878
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099023441
Explore rate 

(0, 0, 2, 1)

Episode 880
time step 25
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901004988726139
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 880
time step 26
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990101803291
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 880
time step 27
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901004988726139
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 880
time step 28
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901004837063803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 880
time step 29
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901004837063803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 880
time step 30
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901004837063803
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 880
time step 31
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)


(0, 0, 4, 2)

Episode 884
time step 25
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 884
time step 26
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

Episode 884 is done after time steps 26
(0, 0, 3, 1)

Episode 885
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990100162479
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 885
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099051334
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 885
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099009488
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 885
time step 3
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099021019
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 885
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099021321
Explore rate 1



(0, 0, 3, 1)

Episode 887
time step 31
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099024469
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 887
time step 32
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009900975
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 887
time step 33
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.990099009900975
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 887
time step 34
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 887
time step 35
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099009901009
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 887
time step 36
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099009901009
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 887
time step 37
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099009899
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Ep

(0, 0, 3, 0)

Episode 889
time step 16
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9900990101786482
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 889
time step 17
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990101758715
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 889
time step 18
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990101758715
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 889
time step 19
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 889
time step 20
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 889
time step 21
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009908
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 889
time step 22
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 2, 1)

Episode 892
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990101058154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 892
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010316
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 892
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010316
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 892
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010316
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 892
time step 12
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010316
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 892
time step 13
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009903
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 892
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990101058154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

E

(0, 0, 4, 2)

Episode 894
time step 19
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9904663880552764
Explore rate 1
Learning 1
Streaks 0

Episode 894 is done after time steps 19
(0, 0, 3, 1)

Episode 895
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9908155893799073
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 895
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901099827804449
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 895
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900989001721956
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 895
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904519472534794
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 895
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904413607166715
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 895
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
L

Best Q 0.990112991278657
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 898
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990112991278657
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 898
time step 8
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990112991278657
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 898
time step 9
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990094994933
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 898
time step 10
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 898
time step 11
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 898
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 898 is done after time steps 12
(0, 0, 2, 1)

Episode 899
time step 0
Action 1
State (0, 

(0, 0, 2, 0)

Episode 901
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009914
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 901
time step 15
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 901
time step 16
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 901 is done after time steps 16
(0, 0, 2, 1)

Episode 902
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099008630154
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 902
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099136984
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 902
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990402022649
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 902
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990402022649
Explore rate 1


(0, 0, 3, 2)

Episode 904
time step 11
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099253211
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 904
time step 12
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901150582112253
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 904
time step 13
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901150582112253
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 904
time step 14
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900988494178877
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 904
time step 15
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990099011505821
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 904
time step 16
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9901152209029966
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 904
time step 17
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901150588090248
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)



(0, 0, 2, 1)

Episode 906
time step 7
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900992026470636
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 906
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900992006714193
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 906
time step 9
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900992006911757
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 906
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900992006911757
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 906
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990080128447
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 906
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 906
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Ep

(0, 0, 2, 0)

Episode 910
time step 3
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099009901046
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 910
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 910
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 910
time step 6
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 910
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009905
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 910
time step 8
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099010134
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 910
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099010127
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode

(0, 0, 3, 2)

Episode 913
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904516472530684
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 913
time step 4
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904516472530684
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 913
time step 5
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901295404079107
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 913
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900987045959209
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 913
time step 7
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901022340224923
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 913
time step 8
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904326956540619
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 913
time step 9
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904326956540619
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episod

(0, 0, 2, 2)

Episode 915
time step 3
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9904480399307083
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 915
time step 4
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9904480399307083
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 915
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904483165187239
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 915
time step 6
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904483165187239
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 915
time step 7
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904483165187239
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 915
time step 8
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904483165187239
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 915
time step 9
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990005329935
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episod

(0, 0, 1, 0)

Episode 917
time step 37
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 917
time step 38
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 917 is done after time steps 38
(0, 0, 3, 1)

Episode 918
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904485459186103
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 918
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904482748429468
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 918
time step 2
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9904482748429468
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 918
time step 3
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900955172515706
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 918
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9904482775537036
Explore rate 1


(0, 0, 2, 1)

Episode 920
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900958914651563
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 920
time step 17
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900958914651563
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 920
time step 18
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990099008664056
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 920
time step 19
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901050909027969
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 920
time step 20
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990410699214189
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 920
time step 21
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990410699214189
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 920
time step 22
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.990410699214189
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Epi

(0, 0, 2, 1)

Episode 923
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901330978856095
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 923
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901330978856095
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 923
time step 13
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901021540228727
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 923
time step 14
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989784597714
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 923
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900993196540298
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 923
time step 16
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900982091844932
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 923
time step 17
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989784597714
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)


(0, 0, 3, 2)

Episode 924
time step 27
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990768638551
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 924
time step 28
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901025378296167
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 924
time step 29
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901025378296167
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 924
time step 30
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9901025378296167
Explore rate 1
Learning 1
Streaks 0

Episode 924 is done after time steps 30
(0, 0, 2, 1)

Episode 925
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901057735441978
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 925
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9906971562579712
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 925
time step 2
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9906971562579712
Explore rate 

(0, 0, 3, 1)

Episode 929
time step 15
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901026104054462
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 929
time step 16
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990089482038
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 929
time step 17
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900954084532756
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 929
time step 18
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900954084532756
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 929
time step 19
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901013267814369
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 929
time step 20
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900989867321857
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 929
time step 21
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901025884115882
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)


(0, 0, 4, 2)

Episode 931
time step 18
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099045184
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 931
time step 19
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099009548
Explore rate 1
Learning 1
Streaks 0

Episode 931 is done after time steps 19
(0, 0, 2, 1)

Episode 932
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990103786558
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 932
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098962135
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 932
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990099010091327
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 932
time step 3
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098990867
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 932
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990099038824
Explore rate 1
L

(0, 0, 2, 1)

Episode 934
time step 16
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901047800147993
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 934
time step 17
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901085191655346
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 934
time step 18
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901084246556162
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 934
time step 19
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901084246556162
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 934
time step 20
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901084246556162
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 934
time step 21
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901025603942608
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 934
time step 22
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901047805976391
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 3, 1)

Episode 937
time step 18
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900992202030559
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 937
time step 19
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900991164612352
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 937
time step 20
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990088353876
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 937
time step 21
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900991164721087
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 937
time step 22
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990451802765
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 937
time step 23
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990451802765
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 937
time step 24
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990451802765
Explore rate 1
Learning 1
Streaks 0

Episode 937 i

(0, 0, 2, 0)

Episode 939
time step 37
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990121350972
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 939
time step 38
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 939
time step 39
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 939 is done after time steps 39
(0, 0, 3, 1)

Episode 940
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990099112212
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 940
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901049578532073
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 940
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901049578532073
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 940
time step 3
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9906578532945607
Explore rate 1

(0, 0, 2, 1)

Episode 942
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990104487828
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 942
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990104432981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 942
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990104432981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 942
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990104432981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 942
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990104432981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 942
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990104432981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 942
time step 10
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episo

(0, 0, 2, 1)

Episode 945
time step 11
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9902852008206084
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 945
time step 12
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989910938254
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 945
time step 13
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990100890619
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 945
time step 14
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098991094
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 945
time step 15
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901008718083242
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 945
time step 16
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901106681403038
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 945
time step 17
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901106681403038
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)


(0, 0, 1, 0)

Episode 948
time step 19
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 948
time step 20
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 948 is done after time steps 20
(0, 0, 3, 1)

Episode 949
time step 0
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990641356508511
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 949
time step 1
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.990093586434915
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 949
time step 2
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990646732133
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 949
time step 3
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990093532678
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 949
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901044328248201
Explore rate 1
Le

(0, 0, 1, 0)

Episode 952
time step 12
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

Episode 952 is done after time steps 12
(0, 0, 3, 1)

Episode 953
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9911571201444744
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 953
time step 1
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900989409345429
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 953
time step 2
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9911571201444744
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 953
time step 3
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.991157014343909
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 953
time step 4
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9911571201444744
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 953
time step 5
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900989409345429
Explore rate 1
Le

(0, 0, 2, 1)

Episode 956
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990644777179
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 956
time step 7
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9901091872588299
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 956
time step 8
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990109085825396
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 956
time step 9
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990109085825396
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 956
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 956
time step 11
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990109085825396
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 956
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901089856134202
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episod

(0, 0, 2, 1)

Episode 959
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901085325548866
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 959
time step 10
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.990098914674451
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 959
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901085419463329
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 959
time step 12
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901085419463329
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 959
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901085419463329
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 959
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900989229248596
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 959
time step 15
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990191150743
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

E

(0, 0, 3, 2)

Episode 962
time step 16
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990117640929
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 962
time step 17
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990117640929
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 962
time step 18
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010603
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 962
time step 19
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099010603
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 962
time step 20
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099045524
Explore rate 1
Learning 1
Streaks 0

Episode 962 is done after time steps 20
(0, 0, 3, 1)

Episode 963
time step 0
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990117624073
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 963
time step 1
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9900990098823759
Explore rate

(0, 0, 2, 0)

Episode 965
time step 27
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 965
time step 28
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099011697
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 965
time step 29
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 965
time step 30
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 965
time step 31
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098923883
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 965
time step 32
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900990098923883
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 965
time step 33
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)


(0, 0, 2, 2)

Episode 967
time step 19
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900992100717414
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 967
time step 20
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900992100717414
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 967
time step 21
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9900990138866641
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 967
time step 22
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 967
time step 23
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009981
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 967
time step 24
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 967
time step 25
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9900990099009901
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 3, 1)

Episode 969
time step 19
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.745269299615929
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 969
time step 20
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.745269299615929
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 969
time step 21
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 4.4142318477042135
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 969
time step 22
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 4.4142318477042135
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 969
time step 23
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.6248202296112426
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 969
time step 24
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 4.2941342397946185
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 969
time step 25
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.6248202296112426
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

E

State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0246805276090651
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 972
time step 4
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.006780192029574
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 972
time step 5
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0069591953853692
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 972
time step 6
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0069591953853692
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 972
time step 7
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901094114019413
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 972
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.710661176338014
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 972
time step 9
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 2.693455658688653
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 972
time step 10
Action 0
State (0, 0, 2, 1)
Rew

(0, 0, 2, 1)

Episode 975
time step 1
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0248560525972872
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 975
time step 2
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.989751439474027
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 975
time step 3
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901024856052598
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 975
time step 4
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0064684538462654
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 975
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.7454218668401382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 975
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.7454218668401382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 975
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0243380425916295
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode

(0, 0, 2, 0)

Episode 977
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.6947981441289772
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 977
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.694793081250418
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 977
time step 15
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 2.7631505572962833
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 977
time step 16
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 2.7631505572962833
Explore rate 1
Learning 1
Streaks 0

Episode 977 is done after time steps 16
(0, 0, 2, 1)

Episode 978
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 2.6777429339578127
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 978
time step 1
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.989773374064171
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 978
time step 2
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901022662593582
Explore rate 1


(0, 0, 3, 1)

Episode 980
time step 10
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 2.5957077279791076
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 980
time step 11
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.694793098980934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 980
time step 12
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.694793098980934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 980
time step 13
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.694793098980934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 980
time step 14
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.694793098980934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 980
time step 15
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.694793098980934
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 980
time step 16
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 2.678254081763015
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episo

(0, 0, 2, 0)

Episode 983
time step 7
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.1211953030014126
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 983
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.1211953030014126
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 983
time step 9
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.1211953030014126
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 983
time step 10
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.1211953030014126
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 983
time step 11
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.1211953030014126
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 983
time step 12
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 2.763150557475379
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 983
time step 13
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 2.763150557475379
Explore rate 1
Learning 1
Streaks 0

Episode 983 is don

(0, 0, 2, 0)

Episode 987
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901006354237121
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 987
time step 5
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9901006191815949
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 987
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900989938081839
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 987
time step 7
Action 0
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990102243392
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 987
time step 8
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990263139118
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 987
time step 9
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990276315054
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 987
time step 10
Action 1
State (0, 0, 1, 0)
Reward 1.0
Best Q 0.9900990276315054
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episo

(0, 0, 2, 0)

Episode 990
time step 4
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990111891299
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 990
time step 5
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990110412784
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 990
time step 6
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 0.9900990110427568
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 990
time step 7
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 1.007829525201202
Explore rate 1
Learning 1
Streaks 0

(0, 0, 1, 0)

Episode 990
time step 8
Action 0
State (0, 0, 1, 0)
Reward 1.0
Best Q 1.007829525201202
Explore rate 1
Learning 1
Streaks 0

Episode 990 is done after time steps 8
(0, 0, 2, 1)

Episode 991
time step 0
Action 1
State (0, 0, 2, 1)
Reward 1.0
Best Q 1.0298575448129192
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 0)

Episode 991
time step 1
Action 1
State (0, 0, 2, 0)
Reward 1.0
Best Q 1.0076522189079116
Explore rate 1
Learn

(0, 0, 3, 2)

Episode 993
time step 17
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901037158003034
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 993
time step 18
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901037162756696
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 993
time step 19
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901122057487086
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 993
time step 20
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901121208539785
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 993
time step 21
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990092059889
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 993
time step 22
Action 0
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990092059889
Explore rate 1
Learning 1
Streaks 0

(0, 0, 4, 2)

Episode 993
time step 23
Action 1
State (0, 0, 4, 2)
Reward 1.0
Best Q 0.9900990099079401
Explore rate 1
Learning 1
Streaks 0

Episode 993 i

(0, 0, 3, 1)

Episode 996
time step 6
Action 1
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901295959760918
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 996
time step 7
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 1.635051592924021
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 996
time step 8
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 1.6285888735343086
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 996
time step 9
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9772513918749446
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 0)

Episode 996
time step 10
Action 1
State (0, 0, 3, 0)
Reward 1.0
Best Q 0.9772513918749446
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 996
time step 11
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901295959760918
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Episode 996
time step 12
Action 0
State (0, 0, 3, 1)
Reward 1.0
Best Q 0.9901295959760918
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 1)

Epis

(0, 0, 2, 1)

Episode 998
time step 10
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9901030062152201
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 1)

Episode 998
time step 11
Action 0
State (0, 0, 2, 1)
Reward 1.0
Best Q 0.9900989699378477
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 998
time step 12
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9908842901485382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 998
time step 13
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9908842901485382
Explore rate 1
Learning 1
Streaks 0

(0, 0, 2, 2)

Episode 998
time step 14
Action 0
State (0, 0, 2, 2)
Reward 1.0
Best Q 0.9912803680045414
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 998
time step 15
Action 0
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901576761219231
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)

Episode 998
time step 16
Action 1
State (0, 0, 3, 2)
Reward 1.0
Best Q 0.9901576761219231
Explore rate 1
Learning 1
Streaks 0

(0, 0, 3, 2)


(0, 0, 4, 1)

Episode 999
time step 23
Action 1
State (0, 0, 4, 1)
Reward 1.0
Best Q 0.9900990084826198
Explore rate 1
Learning 1
Streaks 0

Episode 999 is done after time steps 23


In [47]:
env.close()

array([0., 0.])

In [53]:
q_table

array([0., 0.])