In [1]:
import tic_tac_toe as game
import random
import numpy as np
from collections import deque

from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.optimizers import SGD , Adam

Using TensorFlow backend.


In [2]:
N_ACTIONS = 36
GAMMA = 0.99 # decay rate of past observations
OBSERVATION = 3200. # timesteps to observe before training
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 5000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
LEARNING_RATE = 1e-4

In [3]:
def build_model():
    model = Sequential()
    model.add(Dense(36, input_dim=36, activation='relu'))
    model.add(Dense(36, activation='relu'))
    model.add(Dense(36, activation='relu'))
    model.add(Dense(36, activation='relu'))
    model.add(Dense(36, activation='relu'))
    
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    return model

In [6]:
def train_network(model, args):
    game_state = game.ofttt()
    RM = deque()
    (x_t, r_0, terminal) = (game_state.initial, 0, False)
    s_t = np.array(x_t[1])
    #player2 = game.random_player(game_state, game_state.initial)
    if args == 'run':
        OBSERVE = 999999999    #Keep observe, never train
        epsilon = FINAL_EPSILON    #Use a small epsilon to choose mainly policy actions
        #Load model
        print ("Now we load weight")  
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")
    else:
        #Assign an observation variable and max epsilon to train
        OBSERVE = OBSERVATION
        epsilon = INITIAL_EPSILON
        
    t = 0
    
    while(True and t < 10000):
        
        if terminal:
            print "------------------------------------------FINAL-----------------------------------------------"
            print "Reward:",r_t
            game_state.display(x_t)
            (x_t, r_0, terminal) = (game_state.initial, 0, False)
            #game_state = game_state.initial         
        
        #Initialize variables
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0
        a_t = np.zeros([N_ACTIONS])    #Output vector of actions a_[t] = 1 for action to take
        
        player = x_t[0]
        
        if player == 1:
            if random.random() <= epsilon:    #At the first move, choose randomly
                print("----------Random Action----------")
                action_index = random.randrange(N_ACTIONS)
                a_t[action_index] = 1
            else:
                q = model.predict(s_t.reshape(1,36))       #input the state at time t
                max_Q = np.argmax(q)         #Take the max q value predicted from network
                action_index = 1 + max_Q         #Assign action to the argmax Q
                a_t[max_Q] = 1               #Output vector a_t = 1 for max_Q

            #Decrease epsilon by a smalll factor
            if epsilon > FINAL_EPSILON and t > OBSERVE:               
                epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / 10000
            
            moves = game_state.legal_moves(x_t)
            if action_index not in moves:
                #print action_index, moves
                terminal = True
                r_t = -1
            else:
                x_t1, r_t, terminal = game_state.next_state(action_index, x_t) #run the selected action and observed next state and reward
            #print " 1:", r_t, action_index#, x_t[1]
            
            
        else:
            #Random agent
            moves = game_state.legal_moves(x_t)
            randomAction = moves[random.randrange(0, len(moves))]
            x_t1, r_t, terminal = game_state.next_state(randomAction, x_t)
            #print "-1:", r_t, randomAction#, x_t[1]            
            
        s_t1 = np.array(x_t1[1])
        RM.append((s_t, action_index, r_t, s_t1, terminal))    # store the transition in the Replay Memory
        if len(RM) > REPLAY_MEMORY:
            RM.popleft()

        #only train if done observing
        if t > OBSERVE:
            #sample a minibatch to train on
            minibatch = random.sample(RM, BATCH)
            inputs = np.zeros((BATCH, N_ACTIONS))  
            #print (inputs.shape)
            targets = np.zeros((inputs.shape[0], N_ACTIONS)) 

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                x0_t = np.array(minibatch[i][0])
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]
                state_t1 = minibatch[i][3]
                terminal = minibatch[i][4]
                # if terminated, only equals reward
                state_t = x0_t.reshape(1,N_ACTIONS)

                inputs[i:i + 1] = state_t    #I saved down s_t

                targets[i] = model.predict(state_t)  # Hitting each buttom probability
                Q_sa = model.predict(state_t1.reshape(1,N_ACTIONS))

                if terminal:
                    targets[i, action_t] = reward_t
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            # targets2 = normalize(targets)
            loss += model.train_on_batch(inputs, targets)

            
        moves = game_state.legal_moves(x_t)
        #print "Moves: ", moves      
        player = x_t1[0]        
        s_t = s_t1
        x_t = x_t1
        t = t + 1
        
        # save progress every 10000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            model.save_weights("model.h5", overwrite=True)

        # print info
        info = ""
        if t <= OBSERVE:
            info = "observe"
        else:
            info = "train"

        print("TIMESTEP", t, "/ STATE", info, "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)
       
    print("Episode finished!")
    print("************************")

In [7]:
def playGame(args):
    model = build_model()
    train_network(model,args)

In [8]:
def main():
    #parser = argparse.ArgumentParser(description='Description of your program')
    #parser.add_argument('-m','--mode', help='Train / Run', required=True)
    #args = vars(parser.parse_args())
    playGame(args="train")

In [9]:
if __name__ == "__main__":
    main()

('TIMESTEP', 1, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 4, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 5, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 6, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 7, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  1  

 0  1  0  0 -1  1
 0  0  0  0  0  0
 0  0  0  0  0  1
-1  0 -1  0  0  0
('TIMESTEP', 164, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 165, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 166, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 167, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 168, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 169, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 170, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -

 0  0  0  0  0  0
 0  0  0  0  0  0
 1  0  0  0  0  0
('TIMESTEP', 307, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 15, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 308, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 309, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 310, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 311, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 312, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 313, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 19, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
-1  0  0  0  0  

 0  0  0  0  0  0
 0  1  0  0  0  1
 0  1  0  0 -1  0
 0 -1 -1  1  0  0
 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 458, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 9, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 459, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 460, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 29, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 461, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 462, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 21, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 463, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 464, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 29, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL------

 1  0  0  0  0  0
('TIMESTEP', 614, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 29, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 615, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 616, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 617, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 618, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0 -1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  1
 1  0  0  0  0  0
('TIMESTEP', 619, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 620, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ 

 0  0  0  0  0  1
 0  0  0  0  0  0
-1  0  0  1  1  0
 1  0 -1 -1  0  0
('TIMESTEP', 765, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 766, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 767, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 768, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 769, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 770, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 771, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -

('TIMESTEP', 924, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 925, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
-1  0  0  0  0 -1
 0  0  0  1  0  1
 0  0 -1  0  0  0
 0  0  0  0  0  0
 0  0  0  1  0  1
 0  0  0  0  0 -1
('TIMESTEP', 926, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 927, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 928, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 929, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 930, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REW

 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 993, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 994, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 995, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 996, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
----------Random Action----------
('TIMESTEP', 997, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 12, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 998, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 999, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 11, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
Now we save model
('TIMESTEP', 1000, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, 

('TIMESTEP', 1129, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 14, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0 -1  0  0  0
 0  0  0  0  0  0
 0  0  0  1  0  0
 0  0  0  0  0  0
('TIMESTEP', 1130, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 11, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1131, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1132, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1133, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1134, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1135, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0,

 0  0  0  0 -1  0
 0  1  1  0  0  0
 1  1  0  1  0  0
 0 -1  0  0  0  0
 0 -1  0  1 -1 -1
 0  0  0  0 -1  0
('TIMESTEP', 1271, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 15, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1272, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1273, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1274, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1275, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  1  0  1  0  0
 0  0 -1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 1276, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD

 0 -1  0  1  0  0
 1  0  0  0  0  0
('TIMESTEP', 1402, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1403, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1404, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 28, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1405, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1406, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1407, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1408, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0 -1  0  0  0 -1
 0  0  0 

-1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  1  0  0
 1  0  0  0  0  0
('TIMESTEP', 1465, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1466, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
----------Random Action----------
('TIMESTEP', 1467, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 15, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1468, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1469, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1470, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
----------Random Action----------
('TIMESTEP', 1471, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
-----------

 0  0  0  0  0  1
 0  0  0  0  0  0
 0  0  0  1  0 -1
-1  0  0  0  0  0
('TIMESTEP', 1538, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1539, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1540, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 15, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1541, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1542, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1543, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1544, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Re

 0  0  0  0  0 -1
-1  0  0  1  0  0
 1  0  0  0  0  0
('TIMESTEP', 1628, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1629, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1630, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 15, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1631, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1632, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1633, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1634, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0 

 0  0  0  0  0 -1
 0  0  0  0  1  0
 1  0  1  0  0  0
('TIMESTEP', 1786, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1787, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
----------Random Action----------
('TIMESTEP', 1788, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 9, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1789, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1790, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 12, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1791, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
----------Random Action----------
('TIMESTEP', 1792, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 35, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1793, '/ STATE', 'observe', '/ EPSIL

 0  0  0  0  0  0
 1  0  0  0 -1  1
 1  0  0  0  0  0
 0  0  0 -1  0 -1
 1  0  0 -1  0  0
('TIMESTEP', 1855, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 12, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1856, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1857, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1858, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1859, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1860, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1861, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 12, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL--------------------------------

 1  0  0  0  0  0
('TIMESTEP', 1908, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1909, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1910, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1911, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1912, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1913, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 1914, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
-1  0  0  0  0  0
 0  0  0 

 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0 -1  0  0  0
 0  0  0 -1  1  0
 1  0  0  0  0  0
('TIMESTEP', 2109, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2110, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
----------Random Action----------
('TIMESTEP', 2111, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 28, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2112, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2113, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2114, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2115, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2116, '/ STATE'

 0  0  0  0  0  0
-1  1  0  0  0  1
 0  1  0 -1  0  0
 0  0  0  1  0  0
 0 -1 -1  0  0  0
('TIMESTEP', 2230, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2231, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2232, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2233, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
----------Random Action----------
('TIMESTEP', 2234, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 14, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2235, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2236, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FIN

 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
-1  0  0  1  0  0
 1  0  0  0  0  0
('TIMESTEP', 2297, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2298, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2299, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 28, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2300, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2301, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2302, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2303, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 12, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2304, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ 

 0  0 -1  0  0  0
('TIMESTEP', 2457, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2458, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2459, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2460, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2461, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 15, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  1  0 -1
 1  0  0  0  0  0
('TIMESTEP', 2462, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 15, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2463, '/ STATE', 'observe', '/ EPSILON', 0

('TIMESTEP', 2592, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2593, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2594, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0 -1  0  0
 0  0 -1  0  0  0
 0  0  0 -1  0  1
 0  0 -1  0  0  0
 0  0  0  1  0  0
 1  0  0  1  0  0
('TIMESTEP', 2595, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2596, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2597, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 19, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2598, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0,

 0  0  0  1 -1  1
 0  0  0  0  0  0
('TIMESTEP', 2739, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2740, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2741, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  1  0  0
 0  0  0  0  0  0
('TIMESTEP', 2742, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2743, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2744, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2745, '/ STATE', 'observ

 0 -1  0  1  0  0
 0  1 -1  0  0  1
 0  0  0 -1  0  0
 0  0  0  1  0  0
 1  0  0  0  0  0
('TIMESTEP', 2804, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 12, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2805, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2806, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2807, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2808, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2809, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2810, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL--------------------------------

('TIMESTEP', 2877, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 19, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2878, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2879, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2880, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2881, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 13, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2882, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 2883, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 19, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  1  0  0  0  0
 0  1 -1 

('TIMESTEP', 3021, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3022, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3023, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 28, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3024, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3025, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3026, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3027, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0 -1  0  1
 0  0  0 

 0  0  0  0  0  0
 0  1 -1  0  0  1
 0  0 -1  0  0  0
-1  0  0  0  0  0
 1  0  0  0  0  0
('TIMESTEP', 3175, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3176, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3177, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3178, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3179, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3180, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0, '/ Loss ', 0)
('TIMESTEP', 3181, '/ STATE', 'observe', '/ EPSILON', 0.1, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0, '/ Loss ', 0)
------------------------------------------FINAL--------------------------------

('TIMESTEP', 3230, '/ STATE', 'train', '/ EPSILON', 0.09980019999999998, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.26133662, '/ Loss ', 0.005048064049333334)
('TIMESTEP', 3231, '/ STATE', 'train', '/ EPSILON', 0.09979020999999998, '/ ACTION', 19, '/ REWARD', 0, '/ Q_MAX ', 0.047799807, '/ Loss ', 0.0059540634974837303)
('TIMESTEP', 3232, '/ STATE', 'train', '/ EPSILON', 0.09979020999999998, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.21089178, '/ Loss ', 0.0059125972911715508)
('TIMESTEP', 3233, '/ STATE', 'train', '/ EPSILON', 0.09978021999999998, '/ ACTION', 11, '/ REWARD', 0, '/ Q_MAX ', 0.15284181, '/ Loss ', 0.0049391202628612518)
('TIMESTEP', 3234, '/ STATE', 'train', '/ EPSILON', 0.09978021999999998, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.14159247, '/ Loss ', 0.0086174309253692627)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
-1  0  0  0  0  1
-1  1  0 -1  0  0
 0  1  0  0  0  0
 0  0  0  0  0  

('TIMESTEP', 3272, '/ STATE', 'train', '/ EPSILON', 0.09955044999999994, '/ ACTION', 27, '/ REWARD', -1, '/ Q_MAX ', 0.24595262, '/ Loss ', 0.0055589550174772739)
----------Random Action----------
('TIMESTEP', 3273, '/ STATE', 'train', '/ EPSILON', 0.09954045999999994, '/ ACTION', 2, '/ REWARD', -1, '/ Q_MAX ', 0.16643405, '/ Loss ', 0.0066489479504525661)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
-1  0 -1  0  1 -1
 0  0  0  0  0  0
 1  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  1  0  1
 1  0 -1  0  0  0
('TIMESTEP', 3274, '/ STATE', 'train', '/ EPSILON', 0.09953046999999994, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0.08066617, '/ Loss ', 0.0047510974109172821)
('TIMESTEP', 3275, '/ STATE', 'train', '/ EPSILON', 0.09953046999999994, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.098494425, '/ Loss ', 0.0040369839407503605)
('TIMESTEP', 3276, '/ STATE', 'train', '/ EPSILON', 0.09952047999999994, '/ ACTION', 21, '/ REWARD', 0,

('TIMESTEP', 3318, '/ STATE', 'train', '/ EPSILON', 0.0992507499999999, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0.057093713, '/ Loss ', 0.0054863523691892624)
('TIMESTEP', 3319, '/ STATE', 'train', '/ EPSILON', 0.0992407599999999, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0.17732328, '/ Loss ', 0.0047783800400793552)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
-1 -1  0  0  0  0
 0  0  0  0  0  1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  1  0
 1  0  0  0 -1  0
----------Random Action----------
('TIMESTEP', 3320, '/ STATE', 'train', '/ EPSILON', 0.0992307699999999, '/ ACTION', 19, '/ REWARD', 0, '/ Q_MAX ', 0.17396253, '/ Loss ', 0.0047370805405080318)
('TIMESTEP', 3321, '/ STATE', 'train', '/ EPSILON', 0.0992307699999999, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.14035478, '/ Loss ', 0.0062816152349114418)
('TIMESTEP', 3322, '/ STATE', 'train', '/ EPSILON', 0.0992207799999999, '/ ACTION', 30, '/ REWARD', 0, '/ 

('TIMESTEP', 3368, '/ STATE', 'train', '/ EPSILON', 0.09889110999999985, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.19817807, '/ Loss ', 0.0011164328316226602)
('TIMESTEP', 3369, '/ STATE', 'train', '/ EPSILON', 0.09888111999999985, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0.125109, '/ Loss ', 0.0062674377113580704)
('TIMESTEP', 3370, '/ STATE', 'train', '/ EPSILON', 0.09888111999999985, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.11183585, '/ Loss ', 0.0056513082236051559)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0 -1  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  1  0  0
 0  0  0  0  0  0
 1  0  0  0  0  0
('TIMESTEP', 3371, '/ STATE', 'train', '/ EPSILON', 0.09887112999999985, '/ ACTION', 15, '/ REWARD', 0, '/ Q_MAX ', 0.094147101, '/ Loss ', 0.0046704765409231186)
('TIMESTEP', 3372, '/ STATE', 'train', '/ EPSILON', 0.09887112999999985, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.26935714, '/ Loss ', 0.00

('TIMESTEP', 3415, '/ STATE', 'train', '/ EPSILON', 0.0985414599999998, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0.089163348, '/ Loss ', 0.0012139988830313087)
('TIMESTEP', 3416, '/ STATE', 'train', '/ EPSILON', 0.0985314699999998, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0.17181827, '/ Loss ', 0.0059436485171318054)
('TIMESTEP', 3417, '/ STATE', 'train', '/ EPSILON', 0.0985214799999998, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0.12624425, '/ Loss ', 0.0080706290900707245)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0 -1  0  0  0  0
 0  0  0  1  0  0
 0  1  0  0 -1  0
 1  0  0  0  0  0
('TIMESTEP', 3418, '/ STATE', 'train', '/ EPSILON', 0.0985114899999998, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0.10100542, '/ Loss ', 0.0045267795212566853)
('TIMESTEP', 3419, '/ STATE', 'train', '/ EPSILON', 0.0985114899999998, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.093359448, '/ Loss ', 

('TIMESTEP', 3460, '/ STATE', 'train', '/ EPSILON', 0.09821178999999976, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0.1277847, '/ Loss ', 0.0028778798878192902)
('TIMESTEP', 3461, '/ STATE', 'train', '/ EPSILON', 0.09820179999999976, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0.10408769, '/ Loss ', 0.0047828289680182934)
('TIMESTEP', 3462, '/ STATE', 'train', '/ EPSILON', 0.09819180999999975, '/ ACTION', 30, '/ REWARD', -1, '/ Q_MAX ', 0.10227931, '/ Loss ', 0.0063070822507143021)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  1  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
('TIMESTEP', 3463, '/ STATE', 'train', '/ EPSILON', 0.09818181999999975, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0.076529473, '/ Loss ', 0.0072706965729594231)
('TIMESTEP', 3464, '/ STATE', 'train', '/ EPSILON', 0.09818181999999975, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.21213794, '/ Loss 

('TIMESTEP', 3499, '/ STATE', 'train', '/ EPSILON', 0.09798201999999973, '/ ACTION', 21, '/ REWARD', 0, '/ Q_MAX ', 0.19314846, '/ Loss ', 0.0011996031971648335)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 3500, '/ STATE', 'train', '/ EPSILON', 0.09797202999999972, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.1306963, '/ Loss ', 0.0055607110261917114)
('TIMESTEP', 3501, '/ STATE', 'train', '/ EPSILON', 0.09797202999999972, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.2280632, '/ Loss ', 0.0021181527990847826)
('TIMESTEP', 3502, '/ STATE', 'train', '/ EPSILON', 0.09796203999999972, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.13667306, '/ Loss ', 0.0048113185912370682)
('TIMESTEP', 3503, '/ STATE', 'train', '/ EPSILON', 0.09795204999999972, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.12928657, '/ Loss ', 0.00

('TIMESTEP', 3545, '/ STATE', 'train', '/ EPSILON', 0.09758241999999967, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.10603297, '/ Loss ', 0.0056245829910039902)
('TIMESTEP', 3546, '/ STATE', 'train', '/ EPSILON', 0.09757242999999967, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.082584947, '/ Loss ', 0.0063707530498504639)
('TIMESTEP', 3547, '/ STATE', 'train', '/ EPSILON', 0.09756243999999967, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.23199753, '/ Loss ', 0.003772469237446785)
('TIMESTEP', 3548, '/ STATE', 'train', '/ EPSILON', 0.09755244999999967, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.19805188, '/ Loss ', 0.0039208377711474895)
('TIMESTEP', 3549, '/ STATE', 'train', '/ EPSILON', 0.09754245999999966, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.1210942, '/ Loss ', 0.0022798583377152681)
('TIMESTEP', 3550, '/ STATE', 'train', '/ EPSILON', 0.09753246999999966, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.1312902, '/ Loss ', 0.0047318339347839355)
('TIMESTEP', 3551, '/ STATE', 

 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
('TIMESTEP', 3582, '/ STATE', 'train', '/ EPSILON', 0.09729270999999963, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.12703282, '/ Loss ', 0.0065905135124921799)
('TIMESTEP', 3583, '/ STATE', 'train', '/ EPSILON', 0.09729270999999963, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.16589881, '/ Loss ', 0.0049215611070394516)
('TIMESTEP', 3584, '/ STATE', 'train', '/ EPSILON', 0.09728271999999963, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.1930034, '/ Loss ', 0.0065401559695601463)
----------Random Action----------
('TIMESTEP', 3585, '/ STATE', 'train', '/ EPSILON', 0.09727272999999963, '/ ACTION', 28, '/ REWARD', 0, '/ Q_MAX ', 0.33807451, '/ Loss ', 0.0074874358251690865)
('TIMESTEP', 3586, '/ STATE', 'train', '/ EPSILON', 0.09727272999999963, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.14581175, '/ Loss ', 0.0047330958768725395)
('TIMESTEP', 3587, '/ STATE', 'train', '/ EPSILON', 0.09726273999999963, '/ ACTION', 1, 

('TIMESTEP', 3629, '/ STATE', 'train', '/ EPSILON', 0.09697302999999959, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.20844696, '/ Loss ', 0.0056293751113116741)
('TIMESTEP', 3630, '/ STATE', 'train', '/ EPSILON', 0.09696303999999958, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.22071166, '/ Loss ', 0.0054809479042887688)
('TIMESTEP', 3631, '/ STATE', 'train', '/ EPSILON', 0.09695304999999958, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.21461757, '/ Loss ', 0.0065078944899141788)
('TIMESTEP', 3632, '/ STATE', 'train', '/ EPSILON', 0.09694305999999958, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.11479834, '/ Loss ', 0.0075786560773849487)
('TIMESTEP', 3633, '/ STATE', 'train', '/ EPSILON', 0.09693306999999958, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.16392632, '/ Loss ', 0.0047866767272353172)
('TIMESTEP', 3634, '/ STATE', 'train', '/ EPSILON', 0.09692307999999958, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.22972794, '/ Loss ', 0.0055738263763487339)
----------------------------

('TIMESTEP', 3676, '/ STATE', 'train', '/ EPSILON', 0.09658341999999953, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.21497522, '/ Loss ', 0.0057621849700808525)
('TIMESTEP', 3677, '/ STATE', 'train', '/ EPSILON', 0.09657342999999953, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.50147861, '/ Loss ', 0.0014380677603185177)
----------Random Action----------
('TIMESTEP', 3678, '/ STATE', 'train', '/ EPSILON', 0.09656343999999953, '/ ACTION', 23, '/ REWARD', 0, '/ Q_MAX ', 0.18950479, '/ Loss ', 0.003179966239258647)
('TIMESTEP', 3679, '/ STATE', 'train', '/ EPSILON', 0.09656343999999953, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.12987253, '/ Loss ', 0.0048724506050348282)
('TIMESTEP', 3680, '/ STATE', 'train', '/ EPSILON', 0.09655344999999953, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.24015217, '/ Loss ', 0.0050130682066082954)
('TIMESTEP', 3681, '/ STATE', 'train', '/ EPSILON', 0.09654345999999953, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.177976, '/ Loss ', 0.002870035357773304)

('TIMESTEP', 3717, '/ STATE', 'train', '/ EPSILON', 0.09626373999999949, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0.14342074, '/ Loss ', 0.0072511369362473488)
('TIMESTEP', 3718, '/ STATE', 'train', '/ EPSILON', 0.09626373999999949, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.14502972, '/ Loss ', 0.0097837578505277634)
('TIMESTEP', 3719, '/ STATE', 'train', '/ EPSILON', 0.09625374999999949, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.24155325, '/ Loss ', 0.0047238823026418686)
('TIMESTEP', 3720, '/ STATE', 'train', '/ EPSILON', 0.09625374999999949, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.24812704, '/ Loss ', 0.0060604671016335487)
('TIMESTEP', 3721, '/ STATE', 'train', '/ EPSILON', 0.09624375999999948, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.33619103, '/ Loss ', 0.0050220582634210587)
('TIMESTEP', 3722, '/ STATE', 'train', '/ EPSILON', 0.09623376999999948, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.48727316, '/ Loss ', 0.0038777440786361694)
('TIMESTEP', 3723, '/ STATE', '

('TIMESTEP', 3758, '/ STATE', 'train', '/ EPSILON', 0.09596403999999945, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.26855844, '/ Loss ', 0.0047526354901492596)
('TIMESTEP', 3759, '/ STATE', 'train', '/ EPSILON', 0.09595404999999944, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.1716958, '/ Loss ', 0.0040882276371121407)
('TIMESTEP', 3760, '/ STATE', 'train', '/ EPSILON', 0.09594405999999944, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.22388712, '/ Loss ', 0.0073151509277522564)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  1  0  0  0
 0 -1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
('TIMESTEP', 3761, '/ STATE', 'train', '/ EPSILON', 0.09593406999999944, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.26072082, '/ Loss ', 0.0055497991852462292)
('TIMESTEP', 3762, '/ STATE', 'train', '/ EPSILON', 0.09593406999999944, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.20194329, '/ Loss ', 0.0

('TIMESTEP', 3799, '/ STATE', 'train', '/ EPSILON', 0.0956543499999994, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.21515077, '/ Loss ', 0.009170185774564743)
('TIMESTEP', 3800, '/ STATE', 'train', '/ EPSILON', 0.0956443599999994, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.36293873, '/ Loss ', 0.0051761409267783165)
----------Random Action----------
('TIMESTEP', 3801, '/ STATE', 'train', '/ EPSILON', 0.0956343699999994, '/ ACTION', 11, '/ REWARD', 0, '/ Q_MAX ', 0.19422078, '/ Loss ', 0.004064557608217001)
('TIMESTEP', 3802, '/ STATE', 'train', '/ EPSILON', 0.0956343699999994, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.16631725, '/ Loss ', 0.0032575463410466909)
('TIMESTEP', 3803, '/ STATE', 'train', '/ EPSILON', 0.0956243799999994, '/ ACTION', 30, '/ REWARD', 0, '/ Q_MAX ', 0.22685038, '/ Loss ', 0.0081405071541666985)
('TIMESTEP', 3804, '/ STATE', 'train', '/ EPSILON', 0.0956243799999994, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.38433036, '/ Loss ', 0.0084944013506174088)
-----

('TIMESTEP', 3841, '/ STATE', 'train', '/ EPSILON', 0.09532467999999936, '/ ACTION', 27, '/ REWARD', 0, '/ Q_MAX ', 0.17931782, '/ Loss ', 0.0074146403931081295)
('TIMESTEP', 3842, '/ STATE', 'train', '/ EPSILON', 0.09532467999999936, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.11639109, '/ Loss ', 0.0076554706320166588)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0 -1 -1  0  0
 0  0  0  0 -1  0
 0  0  0  1  0  0
 0  0  0  1  0  0
 0  0  0  0  0  0
('TIMESTEP', 3843, '/ STATE', 'train', '/ EPSILON', 0.09531468999999936, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.22106342, '/ Loss ', 0.010198401287198067)
('TIMESTEP', 3844, '/ STATE', 'train', '/ EPSILON', 0.09531468999999936, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.23347521, '/ Loss ', 0.0054054725915193558)
----------Random Action----------
('TIMESTEP', 3845, '/ STATE', 'train', '/ EPSILON', 0.09530469999999935, '/ ACTION', 6, '/ REWARD', 0, '/ Q_M

('TIMESTEP', 3886, '/ STATE', 'train', '/ EPSILON', 0.09498501999999931, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.35522354, '/ Loss ', 0.0059624616988003254)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 3887, '/ STATE', 'train', '/ EPSILON', 0.09497502999999931, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.32346201, '/ Loss ', 0.0075687933713197708)
('TIMESTEP', 3888, '/ STATE', 'train', '/ EPSILON', 0.09497502999999931, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.17815381, '/ Loss ', 0.0059239203110337257)
('TIMESTEP', 3889, '/ STATE', 'train', '/ EPSILON', 0.0949650399999993, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.15512042, '/ Loss ', 0.0050119860097765923)
('TIMESTEP', 3890, '/ STATE', 'train', '/ EPSILON', 0.0949550499999993, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.19173118, '/ Loss ', 0.004

('TIMESTEP', 3925, '/ STATE', 'train', '/ EPSILON', 0.09469530999999927, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.44998881, '/ Loss ', 0.004151645116508007)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 3926, '/ STATE', 'train', '/ EPSILON', 0.09468531999999927, '/ ACTION', 35, '/ REWARD', 0, '/ Q_MAX ', 0.21689434, '/ Loss ', 0.0059284009039402008)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  1
('TIMESTEP', 3927, '/ STATE', 'train', '/ EPSILON', 0.09467532999999927, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.34657967, '/ Loss ', 0.0088015925139188766)
('TIMESTEP', 3928, '/ STATE', 'train', '/ EPSILON', 0.094

('TIMESTEP', 3964, '/ STATE', 'train', '/ EPSILON', 0.09441558999999923, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.33300328, '/ Loss ', 0.0085424184799194336)
('TIMESTEP', 3965, '/ STATE', 'train', '/ EPSILON', 0.09440559999999923, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.20358101, '/ Loss ', 0.0059334654361009598)
('TIMESTEP', 3966, '/ STATE', 'train', '/ EPSILON', 0.09439560999999923, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.23210734, '/ Loss ', 0.0095294453203678131)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
-1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 3967, '/ STATE', 'train', '/ EPSILON', 0.09438561999999923, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.51656771, '/ Loss ', 0.0083501767367124557)
('TIMESTEP', 3968, '/ STATE', 'train', '/ EPSILON', 0.09438561999999923, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.4336074, '/ Loss ', 0.0

('TIMESTEP', 4009, '/ STATE', 'train', '/ EPSILON', 0.09404595999999918, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.33475697, '/ Loss ', 0.0046780062839388847)
----------Random Action----------
('TIMESTEP', 4010, '/ STATE', 'train', '/ EPSILON', 0.09403596999999918, '/ ACTION', 5, '/ REWARD', 0, '/ Q_MAX ', 0.41256762, '/ Loss ', 0.0086836926639080048)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  1  0  1
 0  0  0  0  0  0
-1  0  0  0  0  1
-1  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
('TIMESTEP', 4011, '/ STATE', 'train', '/ EPSILON', 0.09402597999999918, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.28874964, '/ Loss ', 0.0063329078257083893)
('TIMESTEP', 4012, '/ STATE', 'train', '/ EPSILON', 0.09402597999999918, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.26857448, '/ Loss ', 0.0039246706292033195)
('TIMESTEP', 4013, '/ STATE', 'train', '/ EPSILON', 0.09401598999999917, '/ ACTION', 1, '/ REWARD', -1, '/ Q

 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4061, '/ STATE', 'train', '/ EPSILON', 0.09361638999999912, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.59784466, '/ Loss ', 0.009250326082110405)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4062, '/ STATE', 'train', '/ EPSILON', 0.09360639999999912, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.28647074, '/ Loss ', 0.005559554323554039)
('TIMESTEP', 4063, '/ STATE', 'train', '/ EPSILON', 0.09360639999999912, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.30034727, '/ Loss ', 0.0097874905914068222)
('TIMESTEP', 4064, '/ STATE', 'train', '/ EPSILON', 0.09359640999999912, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.51318616, '/ Loss ', 0.0073769069276750088)
('TIMESTEP', 4065, '/ STATE', 'train', '/ EPSILON', 0.09358641999999912, '/ ACTION', 1, '/

 0  0  0  0  0  0
('TIMESTEP', 4098, '/ STATE', 'train', '/ EPSILON', 0.09332667999999908, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.36779413, '/ Loss ', 0.0092162918299436569)
('TIMESTEP', 4099, '/ STATE', 'train', '/ EPSILON', 0.09332667999999908, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.36617556, '/ Loss ', 0.0089310780167579651)
('TIMESTEP', 4100, '/ STATE', 'train', '/ EPSILON', 0.09331668999999908, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.32727778, '/ Loss ', 0.0063732806593179703)
('TIMESTEP', 4101, '/ STATE', 'train', '/ EPSILON', 0.09330669999999908, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.46892536, '/ Loss ', 0.0057682418264448643)
('TIMESTEP', 4102, '/ STATE', 'train', '/ EPSILON', 0.09329670999999908, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.45843974, '/ Loss ', 0.007033398374915123)
('TIMESTEP', 4103, '/ STATE', 'train', '/ EPSILON', 0.09328671999999907, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.52526402, '/ Loss ', 0.0080185681581497192)
('TIMESTEP', 

('TIMESTEP', 4148, '/ STATE', 'train', '/ EPSILON', 0.09292707999999902, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.38615051, '/ Loss ', 0.0085011422634124756)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4149, '/ STATE', 'train', '/ EPSILON', 0.09291708999999902, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.36161381, '/ Loss ', 0.0090064313262701035)
('TIMESTEP', 4150, '/ STATE', 'train', '/ EPSILON', 0.09291708999999902, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.3305667, '/ Loss ', 0.010806608945131302)
('TIMESTEP', 4151, '/ STATE', 'train', '/ EPSILON', 0.09290709999999902, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.55285025, '/ Loss ', 0.0080434130504727364)
('TIMESTEP', 4152, '/ STATE', 'train', '/ EPSILON', 0.09289710999999902, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.44655091, '/ Loss ', 0.007

('TIMESTEP', 4190, '/ STATE', 'train', '/ EPSILON', 0.09258741999999898, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.50133264, '/ Loss ', 0.0083571616560220718)
('TIMESTEP', 4191, '/ STATE', 'train', '/ EPSILON', 0.09257742999999898, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.67970502, '/ Loss ', 0.0085924835875630379)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4192, '/ STATE', 'train', '/ EPSILON', 0.09256743999999897, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.26215953, '/ Loss ', 0.0058207442052662373)
('TIMESTEP', 4193, '/ STATE', 'train', '/ EPSILON', 0.09256743999999897, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.5393976, '/ Loss ', 0.0082140360027551651)
('TIMESTEP', 4194, '/ STATE', 'train', '/ EPSILON', 0.09255744999999897, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.54449844, '/ Loss ', 0.

('TIMESTEP', 4236, '/ STATE', 'train', '/ EPSILON', 0.09220779999999892, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.47215936, '/ Loss ', 0.0065057361498475075)
('TIMESTEP', 4237, '/ STATE', 'train', '/ EPSILON', 0.09219780999999892, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.42894614, '/ Loss ', 0.0062430910766124725)
----------Random Action----------
('TIMESTEP', 4238, '/ STATE', 'train', '/ EPSILON', 0.09218781999999892, '/ ACTION', 18, '/ REWARD', 0, '/ Q_MAX ', 0.50254619, '/ Loss ', 0.0059870625846087933)
('TIMESTEP', 4239, '/ STATE', 'train', '/ EPSILON', 0.09218781999999892, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.41583622, '/ Loss ', 0.0064799832180142403)
('TIMESTEP', 4240, '/ STATE', 'train', '/ EPSILON', 0.09217782999999892, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.59517658, '/ Loss ', 0.0095149585977196693)
('TIMESTEP', 4241, '/ STATE', 'train', '/ EPSILON', 0.09216783999999892, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.53716213, '/ Loss ', 0.0088080987334251

 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4277, '/ STATE', 'train', '/ EPSILON', 0.09189810999999888, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.88787711, '/ Loss ', 0.011241871863603592)
('TIMESTEP', 4278, '/ STATE', 'train', '/ EPSILON', 0.09189810999999888, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.29220262, '/ Loss ', 0.010274821892380714)
('TIMESTEP', 4279, '/ STATE', 'train', '/ EPSILON', 0.09188811999999888, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.32503062, '/ Loss ', 0.011355625465512276)
('TIMESTEP', 4280, '/ STATE', 'train', '/ EPSILON', 0.09187812999999888, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.32620278, '/ Loss ', 0.0051697399467229843)
('TIMESTEP', 4281, '/ STATE', 'train', '/ EPSILON', 0.09186813999999888, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.57273257, '/ Loss ', 0.0063191866502165794)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0

('TIMESTEP', 4316, '/ STATE', 'train', '/ EPSILON', 0.09161838999999884, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1.0456227, '/ Loss ', 0.013343510217964649)
('TIMESTEP', 4317, '/ STATE', 'train', '/ EPSILON', 0.09160839999999884, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.52584815, '/ Loss ', 0.010980335995554924)
('TIMESTEP', 4318, '/ STATE', 'train', '/ EPSILON', 0.09159840999999884, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.89605045, '/ Loss ', 0.0078150536864995956)
('TIMESTEP', 4319, '/ STATE', 'train', '/ EPSILON', 0.09158841999999884, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.59543908, '/ Loss ', 0.0086057838052511215)
('TIMESTEP', 4320, '/ STATE', 'train', '/ EPSILON', 0.09157842999999884, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.40044263, '/ Loss ', 0.0079742129892110825)
----------Random Action----------
('TIMESTEP', 4321, '/ STATE', 'train', '/ EPSILON', 0.09156843999999884, '/ ACTION', 32, '/ REWARD', 0, '/ Q_MAX ', 0.63756722, '/ Loss ', 0.004624210298061370

('TIMESTEP', 4357, '/ STATE', 'train', '/ EPSILON', 0.0913186899999988, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.57344538, '/ Loss ', 0.01102965883910656)
('TIMESTEP', 4358, '/ STATE', 'train', '/ EPSILON', 0.0913086999999988, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.56014621, '/ Loss ', 0.013915908522903919)
('TIMESTEP', 4359, '/ STATE', 'train', '/ EPSILON', 0.0912987099999988, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.76559919, '/ Loss ', 0.0099743641912937164)
('TIMESTEP', 4360, '/ STATE', 'train', '/ EPSILON', 0.0912887199999988, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.68614966, '/ Loss ', 0.012273220345377922)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4361, '/ STATE', 'train', '/ EPSILON', 0.0912787299999988, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.41075283, '/ Loss ', 0.0080769

('TIMESTEP', 4403, '/ STATE', 'train', '/ EPSILON', 0.09094905999999875, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.6432938, '/ Loss ', 0.011932912282645702)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4404, '/ STATE', 'train', '/ EPSILON', 0.09093906999999875, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.60123122, '/ Loss ', 0.012688233517110348)
('TIMESTEP', 4405, '/ STATE', 'train', '/ EPSILON', 0.09093906999999875, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.60360754, '/ Loss ', 0.012844124808907509)
('TIMESTEP', 4406, '/ STATE', 'train', '/ EPSILON', 0.09092907999999875, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.61727417, '/ Loss ', 0.012893315404653549)
('TIMESTEP', 4407, '/ STATE', 'train', '/ EPSILON', 0.09091908999999875, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.61977726, '/ Loss ', 0.0118

('TIMESTEP', 4451, '/ STATE', 'train', '/ EPSILON', 0.0905594499999987, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.7662518, '/ Loss ', 0.019896760582923889)
('TIMESTEP', 4452, '/ STATE', 'train', '/ EPSILON', 0.0905494599999987, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1.2541312, '/ Loss ', 0.014162050560116768)
('TIMESTEP', 4453, '/ STATE', 'train', '/ EPSILON', 0.09053946999999869, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.901232, '/ Loss ', 0.014876833185553551)
('TIMESTEP', 4454, '/ STATE', 'train', '/ EPSILON', 0.09052947999999869, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.85365701, '/ Loss ', 0.011914479546248913)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4455, '/ STATE', 'train', '/ EPSILON', 0.09051948999999869, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 0.67609245, '/ Loss ', 0.01279272

('TIMESTEP', 4494, '/ STATE', 'train', '/ EPSILON', 0.09022977999999865, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.80708963, '/ Loss ', 0.014927477575838566)
('TIMESTEP', 4495, '/ STATE', 'train', '/ EPSILON', 0.09021978999999865, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.96680629, '/ Loss ', 0.01659725233912468)
('TIMESTEP', 4496, '/ STATE', 'train', '/ EPSILON', 0.09020979999999865, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 0.90319908, '/ Loss ', 0.01516313012689352)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 4497, '/ STATE', 'train', '/ EPSILON', 0.09019980999999865, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1.0923834, '/ Loss ', 0.015275707468390465)
('TIMESTEP', 4498, '/ STATE', 'train', '/ EPSILON', 0.09019980999999865, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX 

('TIMESTEP', 4539, '/ STATE', 'train', '/ EPSILON', 0.0898501599999986, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 0.86493939, '/ Loss ', 0.020939476788043976)
----------Random Action----------
('TIMESTEP', 4540, '/ STATE', 'train', '/ EPSILON', 0.0898401699999986, '/ ACTION', 5, '/ REWARD', 0, '/ Q_MAX ', 1.5871059, '/ Loss ', 0.022717375308275223)
('TIMESTEP', 4541, '/ STATE', 'train', '/ EPSILON', 0.0898401699999986, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1.3028712, '/ Loss ', 0.015022038482129574)
('TIMESTEP', 4542, '/ STATE', 'train', '/ EPSILON', 0.0898301799999986, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1.6480451, '/ Loss ', 0.01764066144824028)
('TIMESTEP', 4543, '/ STATE', 'train', '/ EPSILON', 0.08982018999999859, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1.6110046, '/ Loss ', 0.023633606731891632)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
-1  1  0 -1  0  1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  

('TIMESTEP', 4586, '/ STATE', 'train', '/ EPSILON', 0.08944056999999854, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 3.258867, '/ Loss ', 0.028248298913240433)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4587, '/ STATE', 'train', '/ EPSILON', 0.08943057999999854, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1.559593, '/ Loss ', 0.030751390382647514)
('TIMESTEP', 4588, '/ STATE', 'train', '/ EPSILON', 0.08943057999999854, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 2.0034602, '/ Loss ', 0.035092204809188843)
('TIMESTEP', 4589, '/ STATE', 'train', '/ EPSILON', 0.08942058999999854, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2.1529884, '/ Loss ', 0.041085422039031982)
('TIMESTEP', 4590, '/ STATE', 'train', '/ EPSILON', 0.08941059999999854, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2.0703526, '/ Loss ', 0.024514898657

('TIMESTEP', 4632, '/ STATE', 'train', '/ EPSILON', 0.08906094999999849, '/ ACTION', 22, '/ REWARD', 0, '/ Q_MAX ', 2.9392371, '/ Loss ', 0.05167117714881897)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  1  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
('TIMESTEP', 4633, '/ STATE', 'train', '/ EPSILON', 0.08905095999999849, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 2.3561463, '/ Loss ', 0.055725365877151489)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4634, '/ STATE', 'train', '/ EPSILON', 0.08904096999999848, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 2.3670306, '/ Loss ', 0.05010329931974411)
('TIMESTEP', 4635, '/ STATE', 'train', '/ EPSILON', 0.08904096999999848, '/ ACTION', 0, '/ REWARD'

('TIMESTEP', 4668, '/ STATE', 'train', '/ EPSILON', 0.08879121999999845, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 4.6920233, '/ Loss ', 0.094148695468902588)
('TIMESTEP', 4669, '/ STATE', 'train', '/ EPSILON', 0.08878122999999845, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1.8764094, '/ Loss ', 0.053455889225006104)
----------Random Action----------
('TIMESTEP', 4670, '/ STATE', 'train', '/ EPSILON', 0.08877123999999845, '/ ACTION', 34, '/ REWARD', 0, '/ Q_MAX ', 4.6935992, '/ Loss ', 0.073531880974769592)
('TIMESTEP', 4671, '/ STATE', 'train', '/ EPSILON', 0.08877123999999845, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 2.7241006, '/ Loss ', 0.038760021328926086)
('TIMESTEP', 4672, '/ STATE', 'train', '/ EPSILON', 0.08876124999999845, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 3.8754232, '/ Loss ', 0.082965873181819916)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  

('TIMESTEP', 4713, '/ STATE', 'train', '/ EPSILON', 0.0884515599999984, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 4.6740217, '/ Loss ', 0.11900103092193604)
('TIMESTEP', 4714, '/ STATE', 'train', '/ EPSILON', 0.0884415699999984, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 6.5104294, '/ Loss ', 0.22008699178695679)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4715, '/ STATE', 'train', '/ EPSILON', 0.0884315799999984, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 3.6446345, '/ Loss ', 0.16488507390022278)
('TIMESTEP', 4716, '/ STATE', 'train', '/ EPSILON', 0.0884315799999984, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 4.1407247, '/ Loss ', 0.12889330089092255)
('TIMESTEP', 4717, '/ STATE', 'train', '/ EPSILON', 0.0884215899999984, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 3.9029508, '/ Loss ', 0.16468381881713867

('TIMESTEP', 4759, '/ STATE', 'train', '/ EPSILON', 0.08809191999999835, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 5.6637878, '/ Loss ', 0.35538005828857422)
('TIMESTEP', 4760, '/ STATE', 'train', '/ EPSILON', 0.08808192999999835, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 7.2373977, '/ Loss ', 0.24554482102394104)
('TIMESTEP', 4761, '/ STATE', 'train', '/ EPSILON', 0.08807193999999835, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 10.169436, '/ Loss ', 0.39392176270484924)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  1
 0 -1 -1  0  0  0
 1  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4762, '/ STATE', 'train', '/ EPSILON', 0.08806194999999835, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 4.9659481, '/ Loss ', 0.40324026346206665)
('TIMESTEP', 4763, '/ STATE', 'train', '/ EPSILON', 0.08806194999999835, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 6.9623528, '/ Loss ', 0.376441895961

('TIMESTEP', 4808, '/ STATE', 'train', '/ EPSILON', 0.0877023099999983, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 11.053466, '/ Loss ', 0.61736208200454712)
('TIMESTEP', 4809, '/ STATE', 'train', '/ EPSILON', 0.0877023099999983, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 11.035553, '/ Loss ', 0.92827504873275757)
('TIMESTEP', 4810, '/ STATE', 'train', '/ EPSILON', 0.0876923199999983, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 16.463682, '/ Loss ', 0.7918086051940918)
('TIMESTEP', 4811, '/ STATE', 'train', '/ EPSILON', 0.0876823299999983, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 10.630121, '/ Loss ', 1.3563637733459473)
('TIMESTEP', 4812, '/ STATE', 'train', '/ EPSILON', 0.0876723399999983, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 6.791646, '/ Loss ', 1.1915991306304932)
('TIMESTEP', 4813, '/ STATE', 'train', '/ EPSILON', 0.08766234999999829, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 12.375728, '/ Loss ', 1.1050758361816406)
('TIMESTEP', 4814, '/ STATE', 'train', '/ EPSILON', 0.0876

('TIMESTEP', 4851, '/ STATE', 'train', '/ EPSILON', 0.08738262999999825, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 16.286808, '/ Loss ', 2.6256709098815918)
('TIMESTEP', 4852, '/ STATE', 'train', '/ EPSILON', 0.08738262999999825, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 21.492844, '/ Loss ', 2.5070528984069824)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4853, '/ STATE', 'train', '/ EPSILON', 0.08737263999999825, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 17.664751, '/ Loss ', 2.5736479759216309)
('TIMESTEP', 4854, '/ STATE', 'train', '/ EPSILON', 0.08737263999999825, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 17.281639, '/ Loss ', 2.3474383354187012)
('TIMESTEP', 4855, '/ STATE', 'train', '/ EPSILON', 0.08736264999999825, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 16.46843, '/ Loss ', 3.3154549598693848)
--

('TIMESTEP', 4895, '/ STATE', 'train', '/ EPSILON', 0.08705295999999821, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 28.520132, '/ Loss ', 5.2873544692993164)
('TIMESTEP', 4896, '/ STATE', 'train', '/ EPSILON', 0.08704296999999821, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 16.273569, '/ Loss ', 4.1482105255126953)
('TIMESTEP', 4897, '/ STATE', 'train', '/ EPSILON', 0.0870329799999982, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 25.261461, '/ Loss ', 1.8492667675018311)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0 -1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4898, '/ STATE', 'train', '/ EPSILON', 0.0870229899999982, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 26.048792, '/ Loss ', 4.9164667129516602)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  

('TIMESTEP', 4936, '/ STATE', 'train', '/ EPSILON', 0.08672328999999816, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 50.047531, '/ Loss ', 11.198675155639648)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4937, '/ STATE', 'train', '/ EPSILON', 0.08671329999999816, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 35.509323, '/ Loss ', 10.849930763244629)
('TIMESTEP', 4938, '/ STATE', 'train', '/ EPSILON', 0.08671329999999816, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 42.569054, '/ Loss ', 8.5905895233154297)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4939, '/ STATE', 'train', '/ EPSILON', 0.08670330999999816, '/ ACTION', 1, '/ REWARD', 0

('TIMESTEP', 4980, '/ STATE', 'train', '/ EPSILON', 0.08639361999999812, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 40.520416, '/ Loss ', 22.656814575195312)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 4981, '/ STATE', 'train', '/ EPSILON', 0.08638362999999812, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 36.664318, '/ Loss ', 23.80732536315918)
('TIMESTEP', 4982, '/ STATE', 'train', '/ EPSILON', 0.08638362999999812, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 56.144318, '/ Loss ', 28.937545776367188)
('TIMESTEP', 4983, '/ STATE', 'train', '/ EPSILON', 0.08637363999999811, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 61.90086, '/ Loss ', 24.551990509033203)
('TIMESTEP', 4984, '/ STATE', 'train', '/ EPSILON', 0.08636364999999811, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 83.779381, '/ Loss ', 19.285182952880859)
('

('TIMESTEP', 5016, '/ STATE', 'train', '/ EPSILON', 0.08611389999999808, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 69.677483, '/ Loss ', 42.537956237792969)
('TIMESTEP', 5017, '/ STATE', 'train', '/ EPSILON', 0.08610390999999808, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 55.710751, '/ Loss ', 39.210319519042969)
('TIMESTEP', 5018, '/ STATE', 'train', '/ EPSILON', 0.08609391999999808, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 90.153419, '/ Loss ', 36.257877349853516)
('TIMESTEP', 5019, '/ STATE', 'train', '/ EPSILON', 0.08608392999999807, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 64.786537, '/ Loss ', 38.886558532714844)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0 -1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5020, '/ STATE', 'train', '/ EPSILON', 0.08607393999999807, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 83.572876, '/ Loss ', 52.522514343261719

('TIMESTEP', 5061, '/ STATE', 'train', '/ EPSILON', 0.08577423999999803, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 91.141708, '/ Loss ', 73.617607116699219)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
('TIMESTEP', 5062, '/ STATE', 'train', '/ EPSILON', 0.08576424999999803, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 89.399483, '/ Loss ', 112.75037384033203)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5063, '/ STATE', 'train', '/ EPSILON', 0.08575425999999803, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 76.808136, '/ Loss ', 97.369102478027344)
('TIMESTEP', 5064, '/ STATE', 'train', '/ EPSILON', 0.08575425999999803, '/ ACTION', 0, '/ REWARD', 0

('TIMESTEP', 5106, '/ STATE', 'train', '/ EPSILON', 0.08542458999999798, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 157.04803, '/ Loss ', 138.54884338378906)
('TIMESTEP', 5107, '/ STATE', 'train', '/ EPSILON', 0.08542458999999798, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 176.07199, '/ Loss ', 135.3004150390625)
----------Random Action----------
('TIMESTEP', 5108, '/ STATE', 'train', '/ EPSILON', 0.08541459999999798, '/ ACTION', 10, '/ REWARD', 0, '/ Q_MAX ', 126.8135, '/ Loss ', 146.80694580078125)
('TIMESTEP', 5109, '/ STATE', 'train', '/ EPSILON', 0.08541459999999798, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 233.73648, '/ Loss ', 126.04955291748047)
('TIMESTEP', 5110, '/ STATE', 'train', '/ EPSILON', 0.08540460999999798, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 144.62469, '/ Loss ', 179.86492919921875)
('TIMESTEP', 5111, '/ STATE', 'train', '/ EPSILON', 0.08539461999999798, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 128.03848, '/ Loss ', 112.85166931152344)
('TIMESTEP', 5112, '/ S

('TIMESTEP', 5146, '/ STATE', 'train', '/ EPSILON', 0.08512488999999794, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 101.47807, '/ Loss ', 157.24543762207031)
('TIMESTEP', 5147, '/ STATE', 'train', '/ EPSILON', 0.08511489999999794, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 231.71164, '/ Loss ', 194.53176879882812)
('TIMESTEP', 5148, '/ STATE', 'train', '/ EPSILON', 0.08510490999999794, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 187.95248, '/ Loss ', 182.89630126953125)
('TIMESTEP', 5149, '/ STATE', 'train', '/ EPSILON', 0.08509491999999794, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 268.19531, '/ Loss ', 292.769287109375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
('TIMESTEP', 5150, '/ STATE', 'train', '/ EPSILON', 0.08508492999999794, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 121.26345, '/ Loss ', 186.91937255859375)


('TIMESTEP', 5190, '/ STATE', 'train', '/ EPSILON', 0.08476524999999789, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 211.34236, '/ Loss ', 418.43124389648438)
('TIMESTEP', 5191, '/ STATE', 'train', '/ EPSILON', 0.08476524999999789, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 242.71016, '/ Loss ', 299.30905151367188)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5192, '/ STATE', 'train', '/ EPSILON', 0.08475525999999789, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 139.31029, '/ Loss ', 450.2325439453125)
('TIMESTEP', 5193, '/ STATE', 'train', '/ EPSILON', 0.08475525999999789, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 199.94145, '/ Loss ', 464.0616455078125)
('TIMESTEP', 5194, '/ STATE', 'train', '/ EPSILON', 0.08474526999999789, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 172.86459, '/ Loss ', 343.58514404296875)
---

('TIMESTEP', 5237, '/ STATE', 'train', '/ EPSILON', 0.08440560999999784, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 273.04343, '/ Loss ', 878.6231689453125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0  0  1  0  0  0
('TIMESTEP', 5238, '/ STATE', 'train', '/ EPSILON', 0.08439561999999784, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 418.43793, '/ Loss ', 1040.722900390625)
('TIMESTEP', 5239, '/ STATE', 'train', '/ EPSILON', 0.08439561999999784, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 266.5199, '/ Loss ', 695.514404296875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5240, '/ STATE', 'train', '/ EPSILON', 0.08438562999999784, '/ ACTION', 1, '/ REWARD', 0, '/ 

('TIMESTEP', 5275, '/ STATE', 'train', '/ EPSILON', 0.0841158999999978, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 322.31497, '/ Loss ', 673.8258056640625)
('TIMESTEP', 5276, '/ STATE', 'train', '/ EPSILON', 0.0841059099999978, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 369.11603, '/ Loss ', 1113.27197265625)
----------Random Action----------
('TIMESTEP', 5277, '/ STATE', 'train', '/ EPSILON', 0.0840959199999978, '/ ACTION', 14, '/ REWARD', 0, '/ Q_MAX ', 308.9451, '/ Loss ', 1038.6298828125)
('TIMESTEP', 5278, '/ STATE', 'train', '/ EPSILON', 0.0840959199999978, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 258.94128, '/ Loss ', 738.5037841796875)
('TIMESTEP', 5279, '/ STATE', 'train', '/ EPSILON', 0.0840859299999978, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 234.64171, '/ Loss ', 943.0643310546875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  

('TIMESTEP', 5316, '/ STATE', 'train', '/ EPSILON', 0.08380620999999776, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 547.05109, '/ Loss ', 780.33544921875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5317, '/ STATE', 'train', '/ EPSILON', 0.08379621999999776, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 367.06763, '/ Loss ', 1856.978271484375)
('TIMESTEP', 5318, '/ STATE', 'train', '/ EPSILON', 0.08379621999999776, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 475.90817, '/ Loss ', 1439.799560546875)
('TIMESTEP', 5319, '/ STATE', 'train', '/ EPSILON', 0.08378622999999776, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 613.40991, '/ Loss ', 1334.204345703125)
('TIMESTEP', 5320, '/ STATE', 'train', '/ EPSILON', 0.08377623999999775, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 390.1897, '/ Loss ', 1148.09228515625)
('TIMESTE

('TIMESTEP', 5354, '/ STATE', 'train', '/ EPSILON', 0.08349651999999771, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 812.9599, '/ Loss ', 1330.77490234375)
('TIMESTEP', 5355, '/ STATE', 'train', '/ EPSILON', 0.08348652999999771, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 646.11774, '/ Loss ', 788.35748291015625)
('TIMESTEP', 5356, '/ STATE', 'train', '/ EPSILON', 0.08347653999999771, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 446.66919, '/ Loss ', 1578.54736328125)
('TIMESTEP', 5357, '/ STATE', 'train', '/ EPSILON', 0.08346654999999771, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 467.96243, '/ Loss ', 1832.34130859375)
('TIMESTEP', 5358, '/ STATE', 'train', '/ EPSILON', 0.08345655999999771, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 369.18893, '/ Loss ', 1003.9951171875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1 -1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIME

('TIMESTEP', 5400, '/ STATE', 'train', '/ EPSILON', 0.08311689999999766, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 553.36597, '/ Loss ', 2813.208984375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
('TIMESTEP', 5401, '/ STATE', 'train', '/ EPSILON', 0.08310690999999766, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 536.53571, '/ Loss ', 2363.3310546875)
('TIMESTEP', 5402, '/ STATE', 'train', '/ EPSILON', 0.08310690999999766, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 539.43616, '/ Loss ', 1762.4217529296875)
----------Random Action----------
('TIMESTEP', 5403, '/ STATE', 'train', '/ EPSILON', 0.08309691999999766, '/ ACTION', 28, '/ REWARD', 0, '/ Q_MAX ', 604.10504, '/ Loss ', 2459.95068359375)
('TIMESTEP', 5404, '/ STATE', 'train', '/ EPSILON', 0.08309691999999766, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 514.46857, '/ Loss

('TIMESTEP', 5435, '/ STATE', 'train', '/ EPSILON', 0.08287713999999763, '/ ACTION', 14, '/ REWARD', 0, '/ Q_MAX ', 695.36749, '/ Loss ', 3232.55615234375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5436, '/ STATE', 'train', '/ EPSILON', 0.08286714999999763, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 615.17249, '/ Loss ', 2993.39501953125)
('TIMESTEP', 5437, '/ STATE', 'train', '/ EPSILON', 0.08286714999999763, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 562.42328, '/ Loss ', 6518.16552734375)
('TIMESTEP', 5438, '/ STATE', 'train', '/ EPSILON', 0.08285715999999763, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 618.93524, '/ Loss ', 5894.7001953125)
('TIMESTEP', 5439, '/ STATE', 'train', '/ EPSILON', 0.08284716999999762, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 693.55157, '/ Loss ', 3789.69921875)
('TIMESTEP', 

('TIMESTEP', 5483, '/ STATE', 'train', '/ EPSILON', 0.08247753999999757, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 950.7406, '/ Loss ', 4824.36181640625)
----------Random Action----------
('TIMESTEP', 5484, '/ STATE', 'train', '/ EPSILON', 0.08246754999999757, '/ ACTION', 15, '/ REWARD', 0, '/ Q_MAX ', 739.5426, '/ Loss ', 2680.345458984375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  1 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5485, '/ STATE', 'train', '/ EPSILON', 0.08245755999999757, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 564.31024, '/ Loss ', 3524.689208984375)
('TIMESTEP', 5486, '/ STATE', 'train', '/ EPSILON', 0.08245755999999757, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 845.81982, '/ Loss ', 4618.306640625)
('TIMESTEP', 5487, '/ STATE', 'train', '/ EPSILON', 0.08244756999999757, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 705.33209, '/ Los

('TIMESTEP', 5523, '/ STATE', 'train', '/ EPSILON', 0.08217783999999753, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 598.47656, '/ Loss ', 6453.3359375)
('TIMESTEP', 5524, '/ STATE', 'train', '/ EPSILON', 0.08216784999999753, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 842.20245, '/ Loss ', 5313.181640625)
('TIMESTEP', 5525, '/ STATE', 'train', '/ EPSILON', 0.08216784999999753, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 845.01489, '/ Loss ', 3967.563232421875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
 0  0  0  0 -1  0
 0  0  0  0  0  1
('TIMESTEP', 5526, '/ STATE', 'train', '/ EPSILON', 0.08215785999999753, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 630.78943, '/ Loss ', 4500.55517578125)
('TIMESTEP', 5527, '/ STATE', 'train', '/ EPSILON', 0.08215785999999753, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 622.67151, '/ Loss ', 5875.2236328125)
('TIMESTEP', 5528,

('TIMESTEP', 5564, '/ STATE', 'train', '/ EPSILON', 0.0819081099999975, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 906.94183, '/ Loss ', 5041.1904296875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5565, '/ STATE', 'train', '/ EPSILON', 0.08189811999999749, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 858.00671, '/ Loss ', 8236.546875)
('TIMESTEP', 5566, '/ STATE', 'train', '/ EPSILON', 0.08189811999999749, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 480.62503, '/ Loss ', 5904.0322265625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
('TIMESTEP', 5567, '/ STATE', 'train', '/ EPSILON', 0.08188812999999749, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 

('TIMESTEP', 5605, '/ STATE', 'train', '/ EPSILON', 0.08161839999999745, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 934.36621, '/ Loss ', 5624.291015625)
('TIMESTEP', 5606, '/ STATE', 'train', '/ EPSILON', 0.08160840999999745, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 793.73486, '/ Loss ', 9466.38671875)
('TIMESTEP', 5607, '/ STATE', 'train', '/ EPSILON', 0.08159841999999745, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 905.44427, '/ Loss ', 7070.138671875)
('TIMESTEP', 5608, '/ STATE', 'train', '/ EPSILON', 0.08158842999999745, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1324.207, '/ Loss ', 5195.0556640625)
----------Random Action----------
('TIMESTEP', 5609, '/ STATE', 'train', '/ EPSILON', 0.08157843999999745, '/ ACTION', 26, '/ REWARD', 0, '/ Q_MAX ', 866.48615, '/ Loss ', 5256.974609375)
('TIMESTEP', 5610, '/ STATE', 'train', '/ EPSILON', 0.08157843999999745, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 990.16107, '/ Loss ', 10700.666015625)
('TIMESTEP', 5611, '/ STATE', 'train', '/ E

('TIMESTEP', 5657, '/ STATE', 'train', '/ EPSILON', 0.0812088099999974, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1411.1675, '/ Loss ', 8281.349609375)
('TIMESTEP', 5658, '/ STATE', 'train', '/ EPSILON', 0.0812088099999974, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1178.7649, '/ Loss ', 12088.7265625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
('TIMESTEP', 5659, '/ STATE', 'train', '/ EPSILON', 0.0811988199999974, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1391.9341, '/ Loss ', 11002.009765625)
('TIMESTEP', 5660, '/ STATE', 'train', '/ EPSILON', 0.0811988199999974, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1110.2996, '/ Loss ', 8815.990234375)
('TIMESTEP', 5661, '/ STATE', 'train', '/ EPSILON', 0.0811888299999974, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1885.265, '/ Loss ', 4833.78466796875)
-------------------------

('TIMESTEP', 5698, '/ STATE', 'train', '/ EPSILON', 0.08090910999999736, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 937.35541, '/ Loss ', 7701.56103515625)
('TIMESTEP', 5699, '/ STATE', 'train', '/ EPSILON', 0.08090910999999736, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1174.0846, '/ Loss ', 5813.31640625)
('TIMESTEP', 5700, '/ STATE', 'train', '/ EPSILON', 0.08089911999999735, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1832.2842, '/ Loss ', 12000.0068359375)
('TIMESTEP', 5701, '/ STATE', 'train', '/ EPSILON', 0.08088912999999735, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1311.6296, '/ Loss ', 11432.666015625)
----------Random Action----------
('TIMESTEP', 5702, '/ STATE', 'train', '/ EPSILON', 0.08087913999999735, '/ ACTION', 8, '/ REWARD', 0, '/ Q_MAX ', 1017.2201, '/ Loss ', 11598.84765625)
('TIMESTEP', 5703, '/ STATE', 'train', '/ EPSILON', 0.08087913999999735, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1765.3906, '/ Loss ', 4362.982421875)
('TIMESTEP', 5704, '/ STATE', 'train', '/ 

('TIMESTEP', 5738, '/ STATE', 'train', '/ EPSILON', 0.08060940999999731, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1243.5802, '/ Loss ', 6719.158203125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5739, '/ STATE', 'train', '/ EPSILON', 0.08059941999999731, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1098.1788, '/ Loss ', 14676.162109375)
('TIMESTEP', 5740, '/ STATE', 'train', '/ EPSILON', 0.08059941999999731, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 970.44495, '/ Loss ', 11124.0322265625)
('TIMESTEP', 5741, '/ STATE', 'train', '/ EPSILON', 0.08058942999999731, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1345.7482, '/ Loss ', 14288.310546875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  

('TIMESTEP', 5774, '/ STATE', 'train', '/ EPSILON', 0.08035965999999728, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 2153.4097, '/ Loss ', 19708.7578125)
('TIMESTEP', 5775, '/ STATE', 'train', '/ EPSILON', 0.08035965999999728, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1390.3046, '/ Loss ', 15256.1953125)
('TIMESTEP', 5776, '/ STATE', 'train', '/ EPSILON', 0.08034966999999728, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 919.00757, '/ Loss ', 6349.0654296875)
('TIMESTEP', 5777, '/ STATE', 'train', '/ EPSILON', 0.08033967999999728, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1688.6948, '/ Loss ', 22046.9765625)
('TIMESTEP', 5778, '/ STATE', 'train', '/ EPSILON', 0.08032968999999728, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1221.0361, '/ Loss ', 24783.90234375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5779, '

('TIMESTEP', 5811, '/ STATE', 'train', '/ EPSILON', 0.08009991999999724, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1290.7026, '/ Loss ', 18313.01953125)
('TIMESTEP', 5812, '/ STATE', 'train', '/ EPSILON', 0.08009991999999724, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 627.01453, '/ Loss ', 10371.044921875)
('TIMESTEP', 5813, '/ STATE', 'train', '/ EPSILON', 0.08008992999999724, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1322.9114, '/ Loss ', 11895.3515625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 1  0  0  0  0 -1
('TIMESTEP', 5814, '/ STATE', 'train', '/ EPSILON', 0.08007993999999724, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1559.9974, '/ Loss ', 13604.7490234375)
('TIMESTEP', 5815, '/ STATE', 'train', '/ EPSILON', 0.08007993999999724, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1610.3788, '/ Loss ', 12638.439453125)
('TIMESTEP', 5816

('TIMESTEP', 5845, '/ STATE', 'train', '/ EPSILON', 0.07986015999999721, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1304.4241, '/ Loss ', 7963.74462890625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5846, '/ STATE', 'train', '/ EPSILON', 0.07985016999999721, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1203.9412, '/ Loss ', 10122.06640625)
('TIMESTEP', 5847, '/ STATE', 'train', '/ EPSILON', 0.07985016999999721, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1492.5183, '/ Loss ', 10250.6875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
('TIMESTEP', 5848, '/ STATE', 'train', '/ EPSILON', 0.07984017999999721, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 

('TIMESTEP', 5883, '/ STATE', 'train', '/ EPSILON', 0.07958043999999717, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1373.1659, '/ Loss ', 14215.23828125)
('TIMESTEP', 5884, '/ STATE', 'train', '/ EPSILON', 0.07957044999999717, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1185.931, '/ Loss ', 16562.0234375)
('TIMESTEP', 5885, '/ STATE', 'train', '/ EPSILON', 0.07956045999999717, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1338.2701, '/ Loss ', 18679.9453125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5886, '/ STATE', 'train', '/ EPSILON', 0.07955046999999717, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1213.8945, '/ Loss ', 6252.46142578125)
('TIMESTEP', 5887, '/ STATE', 'train', '/ EPSILON', 0.07955046999999717, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1777.9675, '/ Loss ', 19680.033203125)
('TIMESTEP', 5888,

('TIMESTEP', 5925, '/ STATE', 'train', '/ EPSILON', 0.07925076999999713, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1124.5048, '/ Loss ', 17456.8515625)
('TIMESTEP', 5926, '/ STATE', 'train', '/ EPSILON', 0.07925076999999713, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1357.2604, '/ Loss ', 18151.21875)
('TIMESTEP', 5927, '/ STATE', 'train', '/ EPSILON', 0.07924077999999712, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1636.322, '/ Loss ', 13165.63671875)
('TIMESTEP', 5928, '/ STATE', 'train', '/ EPSILON', 0.07923078999999712, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1171.7084, '/ Loss ', 10917.41796875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 5929, '/ STATE', 'train', '/ EPSILON', 0.07922079999999712, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1286.6116, '/ Loss ', 15068.0322265625)
('TIMESTEP', 5930, '/ 

('TIMESTEP', 5965, '/ STATE', 'train', '/ EPSILON', 0.07895106999999708, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1467.546, '/ Loss ', 16044.7177734375)
('TIMESTEP', 5966, '/ STATE', 'train', '/ EPSILON', 0.07895106999999708, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1196.2491, '/ Loss ', 12630.9140625)
('TIMESTEP', 5967, '/ STATE', 'train', '/ EPSILON', 0.07894107999999708, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1670.8899, '/ Loss ', 22592.92578125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
('TIMESTEP', 5968, '/ STATE', 'train', '/ EPSILON', 0.07893108999999708, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1594.0232, '/ Loss ', 15893.138671875)
('TIMESTEP', 5969, '/ STATE', 'train', '/ EPSILON', 0.07893108999999708, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1484.9257, '/ Loss ', 27406.171875)
---------------------

('TIMESTEP', 6003, '/ STATE', 'train', '/ EPSILON', 0.07866135999999704, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1476.8116, '/ Loss ', 11272.65234375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6004, '/ STATE', 'train', '/ EPSILON', 0.07865136999999704, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1741.2267, '/ Loss ', 11728.08984375)
('TIMESTEP', 6005, '/ STATE', 'train', '/ EPSILON', 0.07865136999999704, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1571.0922, '/ Loss ', 8620.34375)
('TIMESTEP', 6006, '/ STATE', 'train', '/ EPSILON', 0.07864137999999704, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1686.1217, '/ Loss ', 20555.328125)
('TIMESTEP', 6007, '/ STATE', 'train', '/ EPSILON', 0.07863138999999704, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2233.593, '/ Loss ', 12323.4619140625)
('TIMESTEP', 6008, '/ STA

('TIMESTEP', 6045, '/ STATE', 'train', '/ EPSILON', 0.078341679999997, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1864.2538, '/ Loss ', 25090.333984375)
('TIMESTEP', 6046, '/ STATE', 'train', '/ EPSILON', 0.078331689999997, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2061.0212, '/ Loss ', 9524.24609375)
('TIMESTEP', 6047, '/ STATE', 'train', '/ EPSILON', 0.078321699999997, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1398.9611, '/ Loss ', 23491.76171875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  1  0  0  0
 0  0  0 -1  0  0
-1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6048, '/ STATE', 'train', '/ EPSILON', 0.078311709999997, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1648.0314, '/ Loss ', 17946.9453125)
('TIMESTEP', 6049, '/ STATE', 'train', '/ EPSILON', 0.078311709999997, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1578.9568, '/ Loss ', 14707.0517578125)
('TIMESTEP', 6050, '/ STATE

 0  0  0  0  0  0
('TIMESTEP', 6093, '/ STATE', 'train', '/ EPSILON', 0.07797204999999695, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1213.2281, '/ Loss ', 20888.453125)
('TIMESTEP', 6094, '/ STATE', 'train', '/ EPSILON', 0.07797204999999695, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1402.7931, '/ Loss ', 27493.17578125)
('TIMESTEP', 6095, '/ STATE', 'train', '/ EPSILON', 0.07796205999999695, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2167.6448, '/ Loss ', 24266.42578125)
('TIMESTEP', 6096, '/ STATE', 'train', '/ EPSILON', 0.07795206999999695, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1186.0858, '/ Loss ', 11581.75)
('TIMESTEP', 6097, '/ STATE', 'train', '/ EPSILON', 0.07794207999999694, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2150.8411, '/ Loss ', 4409.62255859375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIME

('TIMESTEP', 6139, '/ STATE', 'train', '/ EPSILON', 0.0776124099999969, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1694.019, '/ Loss ', 20919.11328125)
('TIMESTEP', 6140, '/ STATE', 'train', '/ EPSILON', 0.0776124099999969, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1693.3627, '/ Loss ', 9074.240234375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6141, '/ STATE', 'train', '/ EPSILON', 0.0776024199999969, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1570.3275, '/ Loss ', 19417.25)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6142, '/ STATE', 'train', '/ EPSILON', 0.0775924299999969, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1690.9335, 

('TIMESTEP', 6177, '/ STATE', 'train', '/ EPSILON', 0.07734267999999686, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1661.4202, '/ Loss ', 14651.8984375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6178, '/ STATE', 'train', '/ EPSILON', 0.07733268999999686, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 2173.1453, '/ Loss ', 14766.884765625)
('TIMESTEP', 6179, '/ STATE', 'train', '/ EPSILON', 0.07733268999999686, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1656.5444, '/ Loss ', 22337.27734375)
('TIMESTEP', 6180, '/ STATE', 'train', '/ EPSILON', 0.07732269999999686, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2070.9783, '/ Loss ', 17936.28515625)
('TIMESTEP', 6181, '/ STATE', 'train', '/ EPSILON', 0.07731270999999686, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1589.5959, '/ Loss ', 15594.396484375)
('TIMESTEP', 6182, 

('TIMESTEP', 6223, '/ STATE', 'train', '/ EPSILON', 0.07700301999999681, '/ ACTION', 14, '/ REWARD', 0, '/ Q_MAX ', 2211.3879, '/ Loss ', 21355.4921875)
('TIMESTEP', 6224, '/ STATE', 'train', '/ EPSILON', 0.07700301999999681, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 2237.0671, '/ Loss ', 15923.2626953125)
('TIMESTEP', 6225, '/ STATE', 'train', '/ EPSILON', 0.07699302999999681, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1232.7377, '/ Loss ', 18329.40234375)
('TIMESTEP', 6226, '/ STATE', 'train', '/ EPSILON', 0.07698303999999681, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1579.9137, '/ Loss ', 21207.82421875)
('TIMESTEP', 6227, '/ STATE', 'train', '/ EPSILON', 0.07697304999999681, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1580.1776, '/ Loss ', 13781.359375)
('TIMESTEP', 6228, '/ STATE', 'train', '/ EPSILON', 0.07696305999999681, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1580.6576, '/ Loss ', 20245.3984375)
('TIMESTEP', 6229, '/ STATE', 'train', '/ EPSILON', 0.0769530699999968, '/ ACTI

 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6266, '/ STATE', 'train', '/ EPSILON', 0.07665336999999676, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1631.7058, '/ Loss ', 15480.6171875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6267, '/ STATE', 'train', '/ EPSILON', 0.07664337999999676, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1607.5979, '/ Loss ', 25100.791015625)
('TIMESTEP', 6268, '/ STATE', 'train', '/ EPSILON', 0.07664337999999676, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 596.15344, '/ Loss ', 17828.7734375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6269, '/ STAT

('TIMESTEP', 6299, '/ STATE', 'train', '/ EPSILON', 0.07640361999999673, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1281.9531, '/ Loss ', 12163.92578125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
----------Random Action----------
('TIMESTEP', 6300, '/ STATE', 'train', '/ EPSILON', 0.07639362999999673, '/ ACTION', 6, '/ REWARD', 0, '/ Q_MAX ', 1617.7354, '/ Loss ', 9743.208984375)
('TIMESTEP', 6301, '/ STATE', 'train', '/ EPSILON', 0.07639362999999673, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1497.567, '/ Loss ', 13040.1328125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 1  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6302, '/ STATE', 'train', '/ EPSILON', 0.07638363999999673, '/ ACTION'

('TIMESTEP', 6335, '/ STATE', 'train', '/ EPSILON', 0.07613388999999669, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1128.8461, '/ Loss ', 18258.162109375)
('TIMESTEP', 6336, '/ STATE', 'train', '/ EPSILON', 0.07612389999999669, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1741.8037, '/ Loss ', 22623.505859375)
('TIMESTEP', 6337, '/ STATE', 'train', '/ EPSILON', 0.07611390999999669, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1400.6493, '/ Loss ', 29480.458984375)
('TIMESTEP', 6338, '/ STATE', 'train', '/ EPSILON', 0.07610391999999669, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1480.5726, '/ Loss ', 16377.8984375)
('TIMESTEP', 6339, '/ STATE', 'train', '/ EPSILON', 0.07609392999999669, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1635.9442, '/ Loss ', 11541.173828125)
('TIMESTEP', 6340, '/ STATE', 'train', '/ EPSILON', 0.07608393999999669, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1327.3344, '/ Loss ', 23415.494140625)
------------------------------------------FINAL--------------------------

('TIMESTEP', 6382, '/ STATE', 'train', '/ EPSILON', 0.07577424999999664, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1787.7858, '/ Loss ', 24808.796875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
-1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  1
('TIMESTEP', 6383, '/ STATE', 'train', '/ EPSILON', 0.07576425999999664, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1262.2001, '/ Loss ', 27398.02734375)
('TIMESTEP', 6384, '/ STATE', 'train', '/ EPSILON', 0.07576425999999664, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 2312.0784, '/ Loss ', 16255.41796875)
('TIMESTEP', 6385, '/ STATE', 'train', '/ EPSILON', 0.07575426999999664, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1641.8031, '/ Loss ', 9629.58203125)
('TIMESTEP', 6386, '/ STATE', 'train', '/ EPSILON', 0.07574427999999664, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1516.4437, '/ Loss ', 18089.36328125)
('TIMESTEP', 6387, '/

('TIMESTEP', 6418, '/ STATE', 'train', '/ EPSILON', 0.0754945299999966, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 2033.8147, '/ Loss ', 10442.876953125)
('TIMESTEP', 6419, '/ STATE', 'train', '/ EPSILON', 0.0754945299999966, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1196.1586, '/ Loss ', 16327.744140625)
('TIMESTEP', 6420, '/ STATE', 'train', '/ EPSILON', 0.0754845399999966, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1742.1963, '/ Loss ', 18672.759765625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
('TIMESTEP', 6421, '/ STATE', 'train', '/ EPSILON', 0.0754745499999966, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1658.5963, '/ Loss ', 23177.00390625)
('TIMESTEP', 6422, '/ STATE', 'train', '/ EPSILON', 0.0754745499999966, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1641.9923, '/ Loss ', 27144.609375)
------------------------

 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
('TIMESTEP', 6459, '/ STATE', 'train', '/ EPSILON', 0.07520481999999656, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1767.2301, '/ Loss ', 14067.5390625)
('TIMESTEP', 6460, '/ STATE', 'train', '/ EPSILON', 0.07520481999999656, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1207.8927, '/ Loss ', 11401.69921875)
----------Random Action----------
('TIMESTEP', 6461, '/ STATE', 'train', '/ EPSILON', 0.07519482999999656, '/ ACTION', 33, '/ REWARD', 0, '/ Q_MAX ', 1676.3676, '/ Loss ', 27859.27734375)
('TIMESTEP', 6462, '/ STATE', 'train', '/ EPSILON', 0.07519482999999656, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 915.09973, '/ Loss ', 16880.13671875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0 -1  0  1  0  0
('TIMESTEP', 6463, '/ STATE', 'train', '/ EPSILON',

('TIMESTEP', 6495, '/ STATE', 'train', '/ EPSILON', 0.07496505999999653, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1680.5077, '/ Loss ', 22285.580078125)
('TIMESTEP', 6496, '/ STATE', 'train', '/ EPSILON', 0.07495506999999653, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1143.4546, '/ Loss ', 16622.908203125)
('TIMESTEP', 6497, '/ STATE', 'train', '/ EPSILON', 0.07494507999999653, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1634.4138, '/ Loss ', 17677.869140625)
('TIMESTEP', 6498, '/ STATE', 'train', '/ EPSILON', 0.07493508999999653, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1959.2358, '/ Loss ', 16231.576171875)
('TIMESTEP', 6499, '/ STATE', 'train', '/ EPSILON', 0.07492509999999653, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1793.7216, '/ Loss ', 22231.359375)
----------Random Action----------
('TIMESTEP', 6500, '/ STATE', 'train', '/ EPSILON', 0.07491510999999652, '/ ACTION', 19, '/ REWARD', 0, '/ Q_MAX ', 1300.0203, '/ Loss ', 19285.05859375)
-----------------------------------------

('TIMESTEP', 6544, '/ STATE', 'train', '/ EPSILON', 0.07457544999999648, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1625.103, '/ Loss ', 12069.1708984375)
('TIMESTEP', 6545, '/ STATE', 'train', '/ EPSILON', 0.07457544999999648, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1650.9417, '/ Loss ', 10195.009765625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
----------Random Action----------
('TIMESTEP', 6546, '/ STATE', 'train', '/ EPSILON', 0.07456545999999648, '/ ACTION', 19, '/ REWARD', 0, '/ Q_MAX ', 1780.2202, '/ Loss ', 12700.5400390625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6547, '/ STATE', 'train', '/ EPSILON', 0.07455546999999647, '/ 

('TIMESTEP', 6579, '/ STATE', 'train', '/ EPSILON', 0.07431570999999644, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1290.41, '/ Loss ', 11119.4765625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0 -1  0  0  0
('TIMESTEP', 6580, '/ STATE', 'train', '/ EPSILON', 0.07430571999999644, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1655.6704, '/ Loss ', 16931.861328125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6581, '/ STATE', 'train', '/ EPSILON', 0.07429572999999644, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1165.6613, '/ Loss ', 12073.544921875)
('TIMESTEP', 6582, '/ STATE', 'train', '/ EPSILON', 0.07429572999999644, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1

('TIMESTEP', 6618, '/ STATE', 'train', '/ EPSILON', 0.0740259999999964, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1671.1373, '/ Loss ', 10517.5654296875)
('TIMESTEP', 6619, '/ STATE', 'train', '/ EPSILON', 0.0740160099999964, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 822.13818, '/ Loss ', 12889.396484375)
('TIMESTEP', 6620, '/ STATE', 'train', '/ EPSILON', 0.0740060199999964, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2026.7292, '/ Loss ', 13896.671875)
('TIMESTEP', 6621, '/ STATE', 'train', '/ EPSILON', 0.0739960299999964, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1399.1605, '/ Loss ', 12865.2724609375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0 -1  0
 0  0 -1  0  0  0
 0  0  0  0  0  0
 1  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6622, '/ STATE', 'train', '/ EPSILON', 0.0739860399999964, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1925.8979, '/ Loss ', 24575.80078125)
('TIMESTEP', 6623, 

('TIMESTEP', 6662, '/ STATE', 'train', '/ EPSILON', 0.07371630999999636, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1985.4874, '/ Loss ', 22831.61328125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6663, '/ STATE', 'train', '/ EPSILON', 0.07370631999999636, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 2421.7893, '/ Loss ', 13351.2890625)
('TIMESTEP', 6664, '/ STATE', 'train', '/ EPSILON', 0.07370631999999636, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1610.9358, '/ Loss ', 15575.3115234375)
('TIMESTEP', 6665, '/ STATE', 'train', '/ EPSILON', 0.07369632999999635, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1226.1185, '/ Loss ', 12637.09765625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0

('TIMESTEP', 6697, '/ STATE', 'train', '/ EPSILON', 0.07344657999999632, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1563.7777, '/ Loss ', 18607.0390625)
('TIMESTEP', 6698, '/ STATE', 'train', '/ EPSILON', 0.07344657999999632, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1616.3187, '/ Loss ', 14934.6376953125)
('TIMESTEP', 6699, '/ STATE', 'train', '/ EPSILON', 0.07343658999999632, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1696.0886, '/ Loss ', 17033.453125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0 -1  0  0  0
('TIMESTEP', 6700, '/ STATE', 'train', '/ EPSILON', 0.07342659999999632, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1529.9498, '/ Loss ', 21823.076171875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0

('TIMESTEP', 6738, '/ STATE', 'train', '/ EPSILON', 0.07314687999999628, '/ ACTION', 21, '/ REWARD', 0, '/ Q_MAX ', 2239.6614, '/ Loss ', 15983.423828125)
('TIMESTEP', 6739, '/ STATE', 'train', '/ EPSILON', 0.07314687999999628, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 2230.7537, '/ Loss ', 17807.35546875)
('TIMESTEP', 6740, '/ STATE', 'train', '/ EPSILON', 0.07313688999999628, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1660.6104, '/ Loss ', 13110.384765625)
('TIMESTEP', 6741, '/ STATE', 'train', '/ EPSILON', 0.07313688999999628, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1521.7067, '/ Loss ', 17125.98828125)
('TIMESTEP', 6742, '/ STATE', 'train', '/ EPSILON', 0.07312689999999628, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1876.0201, '/ Loss ', 9288.9951171875)
('TIMESTEP', 6743, '/ STATE', 'train', '/ EPSILON', 0.07311690999999627, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1189.7662, '/ Loss ', 10068.169921875)
('TIMESTEP', 6744, '/ STATE', 'train', '/ EPSILON', 0.07310691999999627, '/

('TIMESTEP', 6772, '/ STATE', 'train', '/ EPSILON', 0.07291710999999625, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1611.356, '/ Loss ', 11477.8251953125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6773, '/ STATE', 'train', '/ EPSILON', 0.07290711999999624, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1674.2158, '/ Loss ', 11408.1591796875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6774, '/ STATE', 'train', '/ EPSILON', 0.07289712999999624, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1882.4609, '/ Loss ', 22748.466796875)
('TIMESTEP', 6775, '/ STATE', 'train', '/ EPSILON', 0.07289712999999624, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX

('TIMESTEP', 6809, '/ STATE', 'train', '/ EPSILON', 0.07263738999999621, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1520.8008, '/ Loss ', 11495.279296875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6810, '/ STATE', 'train', '/ EPSILON', 0.0726273999999962, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1550.62, '/ Loss ', 16680.49609375)
('TIMESTEP', 6811, '/ STATE', 'train', '/ EPSILON', 0.0726273999999962, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1143.51, '/ Loss ', 12638.537109375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6812, '/ STATE', 'train', '/ EPSILON', 0.0726174099999962, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1550.

('TIMESTEP', 6847, '/ STATE', 'train', '/ EPSILON', 0.07237764999999617, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1545.5143, '/ Loss ', 13911.7119140625)
('TIMESTEP', 6848, '/ STATE', 'train', '/ EPSILON', 0.07236765999999617, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1545.0417, '/ Loss ', 13247.6796875)
('TIMESTEP', 6849, '/ STATE', 'train', '/ EPSILON', 0.07235766999999617, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1408.9646, '/ Loss ', 10164.1962890625)
('TIMESTEP', 6850, '/ STATE', 'train', '/ EPSILON', 0.07234767999999617, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2151.5876, '/ Loss ', 16334.2568359375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
('TIMESTEP', 6851, '/ STATE', 'train', '/ EPSILON', 0.07233768999999617, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1760.0269, '/ Loss ', 14589.7333984375)
-----------

('TIMESTEP', 6886, '/ STATE', 'train', '/ EPSILON', 0.07207794999999613, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1034.6826, '/ Loss ', 11723.55078125)
('TIMESTEP', 6887, '/ STATE', 'train', '/ EPSILON', 0.07206795999999613, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2263.6169, '/ Loss ', 13633.72265625)
('TIMESTEP', 6888, '/ STATE', 'train', '/ EPSILON', 0.07205796999999613, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1489.4717, '/ Loss ', 14749.0234375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6889, '/ STATE', 'train', '/ EPSILON', 0.07204797999999613, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1423.5392, '/ Loss ', 12557.59375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  

('TIMESTEP', 6925, '/ STATE', 'train', '/ EPSILON', 0.07175826999999609, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1560.1688, '/ Loss ', 13988.0244140625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6926, '/ STATE', 'train', '/ EPSILON', 0.07174827999999608, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1544.8223, '/ Loss ', 11808.021484375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 6927, '/ STATE', 'train', '/ EPSILON', 0.07173828999999608, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1560.2428, '/ Loss ', 16431.6875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0

('TIMESTEP', 6961, '/ STATE', 'train', '/ EPSILON', 0.07151850999999605, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1445.0796, '/ Loss ', 9305.64453125)
----------Random Action----------
('TIMESTEP', 6962, '/ STATE', 'train', '/ EPSILON', 0.07150851999999605, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 1444.3802, '/ Loss ', 7761.22265625)
('TIMESTEP', 6963, '/ STATE', 'train', '/ EPSILON', 0.07150851999999605, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1543.7865, '/ Loss ', 13242.447265625)
('TIMESTEP', 6964, '/ STATE', 'train', '/ EPSILON', 0.07149852999999605, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 2178.8965, '/ Loss ', 8690.416015625)
('TIMESTEP', 6965, '/ STATE', 'train', '/ EPSILON', 0.07148853999999605, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1512.406, '/ Loss ', 16254.572265625)
('TIMESTEP', 6966, '/ STATE', 'train', '/ EPSILON', 0.07147854999999605, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1441.6857, '/ Loss ', 10198.486328125)
('TIMESTEP', 6967, '/ STATE', 'train', '/ E

Now we save model
('TIMESTEP', 7000, '/ STATE', 'train', '/ EPSILON', 0.07122879999999601, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 2162.5557, '/ Loss ', 14750.9619140625)
----------Random Action----------
('TIMESTEP', 7001, '/ STATE', 'train', '/ EPSILON', 0.07121880999999601, '/ ACTION', 6, '/ REWARD', 0, '/ Q_MAX ', 1122.3656, '/ Loss ', 11265.1201171875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
('TIMESTEP', 7002, '/ STATE', 'train', '/ EPSILON', 0.07120881999999601, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1477.8856, '/ Loss ', 17583.42578125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7003, '/ STATE', 'train', '/ EPSILON', 0.07119

('TIMESTEP', 7037, '/ STATE', 'train', '/ EPSILON', 0.07093908999999597, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1392.2727, '/ Loss ', 12140.8408203125)
('TIMESTEP', 7038, '/ STATE', 'train', '/ EPSILON', 0.07092909999999597, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1281.853, '/ Loss ', 14364.689453125)
('TIMESTEP', 7039, '/ STATE', 'train', '/ EPSILON', 0.07091910999999597, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 830.62646, '/ Loss ', 13504.3740234375)
----------Random Action----------
('TIMESTEP', 7040, '/ STATE', 'train', '/ EPSILON', 0.07090911999999597, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1503.0913, '/ Loss ', 10212.5869140625)
('TIMESTEP', 7041, '/ STATE', 'train', '/ EPSILON', 0.07090911999999597, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1390.9163, '/ Loss ', 13427.859375)
('TIMESTEP', 7042, '/ STATE', 'train', '/ EPSILON', 0.07089912999999597, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1414.4341, '/ Loss ', 11232.029296875)
---------------------------------------

('TIMESTEP', 7076, '/ STATE', 'train', '/ EPSILON', 0.07066935999999593, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1450.8123, '/ Loss ', 9048.6572265625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7077, '/ STATE', 'train', '/ EPSILON', 0.07065936999999593, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1071.4055, '/ Loss ', 11695.4287109375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7078, '/ STATE', 'train', '/ EPSILON', 0.07064937999999593, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1157.7668, '/ Loss ', 10514.724609375)
('TIMESTEP', 7079, '/ STATE', 'train', '/ EPSILON', 0.07064937999999593, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX

('TIMESTEP', 7115, '/ STATE', 'train', '/ EPSILON', 0.0703796499999959, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1440.8125, '/ Loss ', 10066.748046875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
('TIMESTEP', 7116, '/ STATE', 'train', '/ EPSILON', 0.07036965999999589, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 989.23419, '/ Loss ', 9633.5244140625)
('TIMESTEP', 7117, '/ STATE', 'train', '/ EPSILON', 0.07036965999999589, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1219.2671, '/ Loss ', 9249.724609375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7118, '/ STATE', 'train', '/ EPSILON', 0.07035966999999589, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ',

('TIMESTEP', 7152, '/ STATE', 'train', '/ EPSILON', 0.07009992999999586, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1753.4801, '/ Loss ', 10012.3486328125)
('TIMESTEP', 7153, '/ STATE', 'train', '/ EPSILON', 0.07008993999999585, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 902.58582, '/ Loss ', 6830.8134765625)
('TIMESTEP', 7154, '/ STATE', 'train', '/ EPSILON', 0.07007994999999585, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1431.4083, '/ Loss ', 12432.1611328125)
('TIMESTEP', 7155, '/ STATE', 'train', '/ EPSILON', 0.07006995999999585, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1106.3717, '/ Loss ', 12881.92578125)
('TIMESTEP', 7156, '/ STATE', 'train', '/ EPSILON', 0.07005996999999585, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1338.4172, '/ Loss ', 9544.767578125)
('TIMESTEP', 7157, '/ STATE', 'train', '/ EPSILON', 0.07004997999999585, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1451.7708, '/ Loss ', 8541.84375)
------------------------------------------FINAL----------------------------

('TIMESTEP', 7196, '/ STATE', 'train', '/ EPSILON', 0.06976026999999581, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1315.7069, '/ Loss ', 7320.99169921875)
('TIMESTEP', 7197, '/ STATE', 'train', '/ EPSILON', 0.0697502799999958, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1315.2899, '/ Loss ', 7860.21435546875)
('TIMESTEP', 7198, '/ STATE', 'train', '/ EPSILON', 0.0697402899999958, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1377.9801, '/ Loss ', 9375.373046875)
----------Random Action----------
('TIMESTEP', 7199, '/ STATE', 'train', '/ EPSILON', 0.0697302999999958, '/ ACTION', 19, '/ REWARD', 0, '/ Q_MAX ', 1454.7831, '/ Loss ', 9477.373046875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7200, '/ STATE', 'train', '/ EPSILON', 0.0697203099999958, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1392.8163, '/ Loss ', 

('TIMESTEP', 7242, '/ STATE', 'train', '/ EPSILON', 0.06941061999999576, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1300.0789, '/ Loss ', 12387.822265625)
('TIMESTEP', 7243, '/ STATE', 'train', '/ EPSILON', 0.06940062999999576, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1299.4756, '/ Loss ', 11629.2099609375)
('TIMESTEP', 7244, '/ STATE', 'train', '/ EPSILON', 0.06939063999999576, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1450.4047, '/ Loss ', 9370.599609375)
('TIMESTEP', 7245, '/ STATE', 'train', '/ EPSILON', 0.06938064999999576, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1306.8092, '/ Loss ', 8594.4287109375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7246, '/ STATE', 'train', '/ EPSILON', 0.06937065999999575, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1203.5435, '/ Loss ', 6317.638671875)
('TIMESTEP', 7

('TIMESTEP', 7280, '/ STATE', 'train', '/ EPSILON', 0.06913089999999572, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1332.8994, '/ Loss ', 11644.857421875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7281, '/ STATE', 'train', '/ EPSILON', 0.06912090999999572, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1380.3787, '/ Loss ', 8680.130859375)
('TIMESTEP', 7282, '/ STATE', 'train', '/ EPSILON', 0.06912090999999572, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1367.1201, '/ Loss ', 12092.359375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7283, '/ STATE', 'train', '/ EPSILON', 0.06911091999999572, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1

('TIMESTEP', 7318, '/ STATE', 'train', '/ EPSILON', 0.06888114999999569, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1174.2765, '/ Loss ', 6691.94921875)
('TIMESTEP', 7319, '/ STATE', 'train', '/ EPSILON', 0.06888114999999569, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1574.5527, '/ Loss ', 6241.53125)
('TIMESTEP', 7320, '/ STATE', 'train', '/ EPSILON', 0.06887115999999568, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 855.80597, '/ Loss ', 9895.890625)
('TIMESTEP', 7321, '/ STATE', 'train', '/ EPSILON', 0.06886116999999568, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1027.4856, '/ Loss ', 8387.2490234375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7322, '/ STATE', 'train', '/ EPSILON', 0.06885117999999568, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1158.1414, '/ Loss ', 7055.22802734375)
('TIMESTEP', 7323, '/ ST

('TIMESTEP', 7357, '/ STATE', 'train', '/ EPSILON', 0.06860142999999565, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1137.9343, '/ Loss ', 5635.1279296875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7358, '/ STATE', 'train', '/ EPSILON', 0.06859143999999565, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1209.8655, '/ Loss ', 7774.048828125)
('TIMESTEP', 7359, '/ STATE', 'train', '/ EPSILON', 0.06859143999999565, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1208.8282, '/ Loss ', 11479.53125)
('TIMESTEP', 7360, '/ STATE', 'train', '/ EPSILON', 0.06858144999999564, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 947.77539, '/ Loss ', 7243.64111328125)
('TIMESTEP', 7361, '/ STATE', 'train', '/ EPSILON', 0.06857145999999564, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1207.4611, '/ Loss ', 13628.4609375)
('TIMESTEP', 7362, 

('TIMESTEP', 7395, '/ STATE', 'train', '/ EPSILON', 0.0683017299999956, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1276.5331, '/ Loss ', 9140.81640625)
('TIMESTEP', 7396, '/ STATE', 'train', '/ EPSILON', 0.0682917399999956, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 1571.5494, '/ Loss ', 5433.0380859375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
 0  0  0  0  0  0
('TIMESTEP', 7397, '/ STATE', 'train', '/ EPSILON', 0.0682817499999956, '/ ACTION', 1, '/ REWARD', 0, '/ Q_MAX ', 1002.7801, '/ Loss ', 6377.23779296875)
('TIMESTEP', 7398, '/ STATE', 'train', '/ EPSILON', 0.0682817499999956, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1255.4871, '/ Loss ', 8799.46484375)
('TIMESTEP', 7399, '/ STATE', 'train', '/ EPSILON', 0.0682717599999956, '/ ACTION', 1, '/ REWARD', -1, '/ Q_MAX ', 866.12561, '/ Loss ', 7329.8994140625)
----------Random Acti

('TIMESTEP', 7434, '/ STATE', 'train', '/ EPSILON', 0.06802200999999557, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1279.5867, '/ Loss ', 10843.0322265625)
('TIMESTEP', 7435, '/ STATE', 'train', '/ EPSILON', 0.06801201999999557, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1683.6973, '/ Loss ', 8533.34765625)
('TIMESTEP', 7436, '/ STATE', 'train', '/ EPSILON', 0.06800202999999556, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1204.2775, '/ Loss ', 6828.1484375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
----------Random Action----------
('TIMESTEP', 7437, '/ STATE', 'train', '/ EPSILON', 0.06799203999999556, '/ ACTION', 11, '/ REWARD', 0, '/ Q_MAX ', 1262.5453, '/ Loss ', 9267.408203125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0

 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7468, '/ STATE', 'train', '/ EPSILON', 0.06775227999999553, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 984.33093, '/ Loss ', 7009.833984375)
('TIMESTEP', 7469, '/ STATE', 'train', '/ EPSILON', 0.06775227999999553, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1406.8893, '/ Loss ', 7632.185546875)
('TIMESTEP', 7470, '/ STATE', 'train', '/ EPSILON', 0.06774228999999553, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 937.58484, '/ Loss ', 6923.12109375)
('TIMESTEP', 7471, '/ STATE', 'train', '/ EPSILON', 0.06773229999999553, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1164.6019, '/ Loss ', 8587.814453125)
('TIMESTEP', 7472, '/ STATE', 'train', '/ EPSILON', 0.06772230999999553, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 674.41345, '/ Loss ', 4471.01025390625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  

('TIMESTEP', 7505, '/ STATE', 'train', '/ EPSILON', 0.0674925399999955, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1390.326, '/ Loss ', 10007.5830078125)
('TIMESTEP', 7506, '/ STATE', 'train', '/ EPSILON', 0.06748254999999549, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 864.37085, '/ Loss ', 5012.77783203125)
('TIMESTEP', 7507, '/ STATE', 'train', '/ EPSILON', 0.06748254999999549, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1151.9121, '/ Loss ', 6814.6884765625)
('TIMESTEP', 7508, '/ STATE', 'train', '/ EPSILON', 0.06747255999999549, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1180.6875, '/ Loss ', 6737.734375)
('TIMESTEP', 7509, '/ STATE', 'train', '/ EPSILON', 0.06746256999999549, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1334.1927, '/ Loss ', 4317.6328125)
('TIMESTEP', 7510, '/ STATE', 'train', '/ EPSILON', 0.06745257999999549, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 803.42017, '/ Loss ', 6979.61767578125)
('TIMESTEP', 7511, '/ STATE', 'train', '/ EPSILON', 0.06744258999999549, '/

('TIMESTEP', 7552, '/ STATE', 'train', '/ EPSILON', 0.06713289999999544, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1060.2477, '/ Loss ', 9228.921875)
('TIMESTEP', 7553, '/ STATE', 'train', '/ EPSILON', 0.06712290999999544, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1141.2754, '/ Loss ', 7319.5478515625)
('TIMESTEP', 7554, '/ STATE', 'train', '/ EPSILON', 0.06711291999999544, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1163.3922, '/ Loss ', 9701.38671875)
('TIMESTEP', 7555, '/ STATE', 'train', '/ EPSILON', 0.06710292999999544, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1218.3932, '/ Loss ', 3841.9697265625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7556, '/ STATE', 'train', '/ EPSILON', 0.06709293999999544, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1114.2056, '/ Loss ', 10852.2890625)
-----------------

('TIMESTEP', 7592, '/ STATE', 'train', '/ EPSILON', 0.0668032299999954, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1094.9406, '/ Loss ', 7677.66015625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7593, '/ STATE', 'train', '/ EPSILON', 0.0667932399999954, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1166.322, '/ Loss ', 5109.298828125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7594, '/ STATE', 'train', '/ EPSILON', 0.0667832499999954, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1245.0482, '/ Loss ', 7140.744140625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  

 0  0  1  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
('TIMESTEP', 7629, '/ STATE', 'train', '/ EPSILON', 0.06653349999999536, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1143.5847, '/ Loss ', 4944.95751953125)
('TIMESTEP', 7630, '/ STATE', 'train', '/ EPSILON', 0.06653349999999536, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1018.1719, '/ Loss ', 5002.9287109375)
('TIMESTEP', 7631, '/ STATE', 'train', '/ EPSILON', 0.06652350999999536, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 736.53973, '/ Loss ', 5998.234375)
('TIMESTEP', 7632, '/ STATE', 'train', '/ EPSILON', 0.06651351999999536, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1114.6646, '/ Loss ', 5399.328125)
('TIMESTEP', 7633, '/ STATE', 'train', '/ EPSILON', 0.06650352999999536, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1160.1892, '/ Loss ', 6184.60498046875)
('TIMESTEP', 7634, '/ STATE', 'train', '/ EPSILON', 0.06649353999999535, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 843.39264, '/ Loss ', 6054.052734375)
----------Random Act

('TIMESTEP', 7670, '/ STATE', 'train', '/ EPSILON', 0.06620382999999531, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1044.7814, '/ Loss ', 6394.02099609375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
('TIMESTEP', 7671, '/ STATE', 'train', '/ EPSILON', 0.06619383999999531, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 778.20605, '/ Loss ', 5933.7119140625)
('TIMESTEP', 7672, '/ STATE', 'train', '/ EPSILON', 0.06619383999999531, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1338.9784, '/ Loss ', 10215.7978515625)
('TIMESTEP', 7673, '/ STATE', 'train', '/ EPSILON', 0.06618384999999531, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1035.5164, '/ Loss ', 8043.18896484375)
('TIMESTEP', 7674, '/ STATE', 'train', '/ EPSILON', 0.06617385999999531, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1109.592, '/ Loss ', 7887.3994140625)
('TIMES

('TIMESTEP', 7709, '/ STATE', 'train', '/ EPSILON', 0.06591411999999527, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1150.0094, '/ Loss ', 5071.4111328125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7710, '/ STATE', 'train', '/ EPSILON', 0.06590412999999527, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1043.0914, '/ Loss ', 8432.7744140625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7711, '/ STATE', 'train', '/ EPSILON', 0.06589413999999527, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1145.6605, '/ Loss ', 5049.87646484375)
('TIMESTEP', 7712, '/ STATE', 'train', '/ EPSILON', 0.06589413999999527, '/ ACTION', 0, '/ REWARD', 0, '/ Q_M

('TIMESTEP', 7743, '/ STATE', 'train', '/ EPSILON', 0.06567435999999524, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1090.0924, '/ Loss ', 5356.99609375)
('TIMESTEP', 7744, '/ STATE', 'train', '/ EPSILON', 0.06567435999999524, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1314.8406, '/ Loss ', 7134.927734375)
('TIMESTEP', 7745, '/ STATE', 'train', '/ EPSILON', 0.06566436999999524, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1184.625, '/ Loss ', 7979.033203125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0 -1  0
('TIMESTEP', 7746, '/ STATE', 'train', '/ EPSILON', 0.06565437999999524, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1089.0756, '/ Loss ', 4492.607421875)
('TIMESTEP', 7747, '/ STATE', 'train', '/ EPSILON', 0.06565437999999524, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1497.817, '/ Loss ', 6372.263671875)
('TIMESTEP', 7748, '

('TIMESTEP', 7781, '/ STATE', 'train', '/ EPSILON', 0.0654146199999952, '/ ACTION', 17, '/ REWARD', 0, '/ Q_MAX ', 905.37811, '/ Loss ', 5267.6728515625)
('TIMESTEP', 7782, '/ STATE', 'train', '/ EPSILON', 0.0654146199999952, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1159.6613, '/ Loss ', 6390.2890625)
('TIMESTEP', 7783, '/ STATE', 'train', '/ EPSILON', 0.0654046299999952, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1050.3512, '/ Loss ', 5676.6669921875)
('TIMESTEP', 7784, '/ STATE', 'train', '/ EPSILON', 0.0654046299999952, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1082.0306, '/ Loss ', 6388.28076171875)
('TIMESTEP', 7785, '/ STATE', 'train', '/ EPSILON', 0.0653946399999952, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 940.14264, '/ Loss ', 9069.8359375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  1
 0  0  1  0 -1  0
 0  0  0  0  0  0
 0  0 -1  0  0  0
('TIMESTEP', 7786, '/ 

('TIMESTEP', 7817, '/ STATE', 'train', '/ EPSILON', 0.06515487999999517, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 792.4975, '/ Loss ', 7793.85546875)
('TIMESTEP', 7818, '/ STATE', 'train', '/ EPSILON', 0.06514488999999517, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1116.2185, '/ Loss ', 5265.15625)
('TIMESTEP', 7819, '/ STATE', 'train', '/ EPSILON', 0.06513489999999517, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1173.0685, '/ Loss ', 6750.84814453125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0 -1  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7820, '/ STATE', 'train', '/ EPSILON', 0.06512490999999516, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1077.786, '/ Loss ', 5445.41796875)
('TIMESTEP', 7821, '/ STATE', 'train', '/ EPSILON', 0.06512490999999516, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1077.6614, '/ Loss ', 5607.69921875)
('TIMESTEP', 7822, '/ S

 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 7855, '/ STATE', 'train', '/ EPSILON', 0.06485517999999513, '/ ACTION', 11, '/ REWARD', 0, '/ Q_MAX ', 1212.1034, '/ Loss ', 3995.353515625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7856, '/ STATE', 'train', '/ EPSILON', 0.06484518999999513, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1130.174, '/ Loss ', 6188.748046875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7857, '/ STATE', 'train', '/ EPSILON', 0.06483519999999512, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1063.5424, '/ Loss ', 3150.98828125)
('TIMESTEP', 7858, '/ STATE', 'train', '/ EPSILON', 0.06483519

('TIMESTEP', 7890, '/ STATE', 'train', '/ EPSILON', 0.0646353999999951, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1102.8595, '/ Loss ', 5772.427734375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7891, '/ STATE', 'train', '/ EPSILON', 0.0646254099999951, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1064.4148, '/ Loss ', 7277.05419921875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7892, '/ STATE', 'train', '/ EPSILON', 0.0646154199999951, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1059.4124, '/ Loss ', 4000.069580078125)
('TIMESTEP', 7893, '/ STATE', 'train', '/ EPSILON', 0.0646154199999951, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX 

('TIMESTEP', 7928, '/ STATE', 'train', '/ EPSILON', 0.06433569999999506, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1091.6359, '/ Loss ', 5118.1162109375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 7929, '/ STATE', 'train', '/ EPSILON', 0.06432570999999505, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1097.9099, '/ Loss ', 6732.146484375)
('TIMESTEP', 7930, '/ STATE', 'train', '/ EPSILON', 0.06432570999999505, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 995.6936, '/ Loss ', 4014.55615234375)
('TIMESTEP', 7931, '/ STATE', 'train', '/ EPSILON', 0.06431571999999505, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1204.9241, '/ Loss ', 10832.28125)
('TIMESTEP', 7932, '/ STATE', 'train', '/ EPSILON', 0.06430572999999505, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1076.58, '/ Loss ', 8528.169921875)
('TIMESTEP', 7933, 

('TIMESTEP', 7962, '/ STATE', 'train', '/ EPSILON', 0.06409593999999502, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 858.87299, '/ Loss ', 5973.134765625)
('TIMESTEP', 7963, '/ STATE', 'train', '/ EPSILON', 0.06408594999999502, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 927.41913, '/ Loss ', 5852.5400390625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
('TIMESTEP', 7964, '/ STATE', 'train', '/ EPSILON', 0.06407595999999502, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1094.2217, '/ Loss ', 4710.23876953125)
('TIMESTEP', 7965, '/ STATE', 'train', '/ EPSILON', 0.06407595999999502, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1091.1819, '/ Loss ', 5542.3193359375)
('TIMESTEP', 7966, '/ STATE', 'train', '/ EPSILON', 0.06406596999999502, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 926.83923, '/ Loss ', 5040.64306640625)
--------

('TIMESTEP', 7997, '/ STATE', 'train', '/ EPSILON', 0.06382620999999498, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 963.02277, '/ Loss ', 5354.865234375)
('TIMESTEP', 7998, '/ STATE', 'train', '/ EPSILON', 0.06381621999999498, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1114.6119, '/ Loss ', 6095.3896484375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
('TIMESTEP', 7999, '/ STATE', 'train', '/ EPSILON', 0.06380622999999498, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1050.3047, '/ Loss ', 7612.32080078125)
Now we save model
('TIMESTEP', 8000, '/ STATE', 'train', '/ EPSILON', 0.06380622999999498, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1089.4415, '/ Loss ', 5253.13037109375)
('TIMESTEP', 8001, '/ STATE', 'train', '/ EPSILON', 0.06379623999999498, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1090.7517, '/ Loss ', 5925.82

('TIMESTEP', 8031, '/ STATE', 'train', '/ EPSILON', 0.06356646999999495, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 779.85388, '/ Loss ', 5313.619140625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8032, '/ STATE', 'train', '/ EPSILON', 0.06355647999999495, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 607.25519, '/ Loss ', 6265.6943359375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8033, '/ STATE', 'train', '/ EPSILON', 0.06354648999999495, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1041.8962, '/ Loss ', 6167.6630859375)
('TIMESTEP', 8034, '/ STATE', 'train', '/ EPSILON', 0.06354648999999495, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX

('TIMESTEP', 8069, '/ STATE', 'train', '/ EPSILON', 0.06328674999999491, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1163.08, '/ Loss ', 5135.2607421875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8070, '/ STATE', 'train', '/ EPSILON', 0.06327675999999491, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 747.90283, '/ Loss ', 4352.5634765625)
('TIMESTEP', 8071, '/ STATE', 'train', '/ EPSILON', 0.06327675999999491, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1042.0555, '/ Loss ', 6144.6962890625)
('TIMESTEP', 8072, '/ STATE', 'train', '/ EPSILON', 0.0632667699999949, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1101.9401, '/ Loss ', 6102.8779296875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  

('TIMESTEP', 8108, '/ STATE', 'train', '/ EPSILON', 0.06298704999999487, '/ ACTION', 24, '/ REWARD', 0, '/ Q_MAX ', 1034.6844, '/ Loss ', 5185.89111328125)
('TIMESTEP', 8109, '/ STATE', 'train', '/ EPSILON', 0.06298704999999487, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 832.63446, '/ Loss ', 5021.876953125)
('TIMESTEP', 8110, '/ STATE', 'train', '/ EPSILON', 0.06297705999999487, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1007.1169, '/ Loss ', 6735.6708984375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
-1  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  1  0  0  0
 1  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8111, '/ STATE', 'train', '/ EPSILON', 0.06296706999999487, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 795.8847, '/ Loss ', 4542.154296875)
('TIMESTEP', 8112, '/ STATE', 'train', '/ EPSILON', 0.06296706999999487, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1051.7374, '/ Loss ', 4454.7177734375)
('TIMESTEP', 8

('TIMESTEP', 8144, '/ STATE', 'train', '/ EPSILON', 0.06272730999999483, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1034.2766, '/ Loss ', 5548.4912109375)
('TIMESTEP', 8145, '/ STATE', 'train', '/ EPSILON', 0.06272730999999483, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 963.9328, '/ Loss ', 5165.0966796875)
('TIMESTEP', 8146, '/ STATE', 'train', '/ EPSILON', 0.06271731999999483, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1034.5028, '/ Loss ', 4794.1513671875)
('TIMESTEP', 8147, '/ STATE', 'train', '/ EPSILON', 0.06270732999999483, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1050.7839, '/ Loss ', 5040.58056640625)
('TIMESTEP', 8148, '/ STATE', 'train', '/ EPSILON', 0.06269733999999483, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1034.6129, '/ Loss ', 4556.755859375)
('TIMESTEP', 8149, '/ STATE', 'train', '/ EPSILON', 0.06268734999999483, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 961.48309, '/ Loss ', 5577.609375)
------------------------------------------FINAL-------------------------

('TIMESTEP', 8181, '/ STATE', 'train', '/ EPSILON', 0.062457579999994794, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 950.9184, '/ Loss ', 5991.8369140625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0 -1  0
 0  0  0  0  0  0
('TIMESTEP', 8182, '/ STATE', 'train', '/ EPSILON', 0.06244758999999479, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 992.04742, '/ Loss ', 5025.5107421875)
('TIMESTEP', 8183, '/ STATE', 'train', '/ EPSILON', 0.06244758999999479, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1047.0358, '/ Loss ', 5069.609375)
('TIMESTEP', 8184, '/ STATE', 'train', '/ EPSILON', 0.06243759999999479, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1082.4231, '/ Loss ', 5769.7314453125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0 

 0  0  0  0  0  0
 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 8219, '/ STATE', 'train', '/ EPSILON', 0.062177859999994756, '/ ACTION', 12, '/ REWARD', 0, '/ Q_MAX ', 905.89111, '/ Loss ', 7027.033203125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 1  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8220, '/ STATE', 'train', '/ EPSILON', 0.062167869999994754, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1093.119, '/ Loss ', 5485.63818359375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8221, '/ STATE', 'train', '/ EPSILON', 0.06215787999999475, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1048.5439, '/ Loss ', 3934.29296875)
('TIMESTEP', 8222, '/ STATE', 'train', '

('TIMESTEP', 8253, '/ STATE', 'train', '/ EPSILON', 0.06191811999999472, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 748.45825, '/ Loss ', 4703.6845703125)
('TIMESTEP', 8254, '/ STATE', 'train', '/ EPSILON', 0.06190812999999472, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1087.3998, '/ Loss ', 3922.32958984375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
('TIMESTEP', 8255, '/ STATE', 'train', '/ EPSILON', 0.06189813999999472, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1001.2196, '/ Loss ', 5755.302734375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8256, '/ STATE', 'train', '/ EPSILON', 0.061888149999994715, '/ ACTION', 20, '/ REWARD', 0, '/ 

('TIMESTEP', 8292, '/ STATE', 'train', '/ EPSILON', 0.06163839999999468, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 734.05481, '/ Loss ', 4057.12744140625)
('TIMESTEP', 8293, '/ STATE', 'train', '/ EPSILON', 0.06162840999999468, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 538.04016, '/ Loss ', 6707.662109375)
('TIMESTEP', 8294, '/ STATE', 'train', '/ EPSILON', 0.06161841999999468, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1028.4823, '/ Loss ', 5867.1240234375)
('TIMESTEP', 8295, '/ STATE', 'train', '/ EPSILON', 0.06160842999999468, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 983.44, '/ Loss ', 4502.658203125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
('TIMESTEP', 8296, '/ STATE', 'train', '/ EPSILON', 0.061598439999994675, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 823.71283, '/ Loss ', 6207.7509765625)
('TIMESTEP',

('TIMESTEP', 8326, '/ STATE', 'train', '/ EPSILON', 0.061388649999994646, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 997.93579, '/ Loss ', 4268.11962890625)
('TIMESTEP', 8327, '/ STATE', 'train', '/ EPSILON', 0.061388649999994646, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 621.18909, '/ Loss ', 6112.9833984375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8328, '/ STATE', 'train', '/ EPSILON', 0.061378659999994645, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1040.3824, '/ Loss ', 4465.2626953125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8329, '/ STATE', 'train', '/ EPSILON', 0.06136866999999464, '/ ACTION', 20, '/ REWARD', 0, '/

 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8364, '/ STATE', 'train', '/ EPSILON', 0.06111891999999461, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 729.74365, '/ Loss ', 4779.0341796875)
('TIMESTEP', 8365, '/ STATE', 'train', '/ EPSILON', 0.06111891999999461, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 895.33929, '/ Loss ', 4959.68408203125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8366, '/ STATE', 'train', '/ EPSILON', 0.06110892999999461, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 895.37787, '/ Loss ', 4481.3232421875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8367, '/ STATE', 'train', '/ EPSILON', 0.061098939999994606,

('TIMESTEP', 8402, '/ STATE', 'train', '/ EPSILON', 0.060869169999994574, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 734.69794, '/ Loss ', 6175.6357421875)
('TIMESTEP', 8403, '/ STATE', 'train', '/ EPSILON', 0.06085917999999457, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 735.008, '/ Loss ', 5613.1806640625)
('TIMESTEP', 8404, '/ STATE', 'train', '/ EPSILON', 0.06084918999999457, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 779.30542, '/ Loss ', 6816.92724609375)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
-1  0  0  0  0  0
('TIMESTEP', 8405, '/ STATE', 'train', '/ EPSILON', 0.06083919999999457, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 988.26349, '/ Loss ', 3977.40673828125)
('TIMESTEP', 8406, '/ STATE', 'train', '/ EPSILON', 0.06083919999999457, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1063.8892, '/ Loss ', 4284.1416015625)
('TIMESTEP

('TIMESTEP', 8436, '/ STATE', 'train', '/ EPSILON', 0.06062940999999454, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1033.1088, '/ Loss ', 4935.07080078125)
('TIMESTEP', 8437, '/ STATE', 'train', '/ EPSILON', 0.06061941999999454, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 973.18909, '/ Loss ', 6587.9853515625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8438, '/ STATE', 'train', '/ EPSILON', 0.06060942999999454, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1012.4857, '/ Loss ', 3872.7294921875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8439, '/ STATE', 'train', '/ EPSILON', 0.06059943999999454, '/ ACTION', 20, '/ REWARD', 0, '/ 

('TIMESTEP', 8481, '/ STATE', 'train', '/ EPSILON', 0.060299739999994495, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 968.20795, '/ Loss ', 4854.755859375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8482, '/ STATE', 'train', '/ EPSILON', 0.060289749999994494, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1056.8304, '/ Loss ', 4859.74609375)
('TIMESTEP', 8483, '/ STATE', 'train', '/ EPSILON', 0.060289749999994494, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1042.2274, '/ Loss ', 8110.03515625)
('TIMESTEP', 8484, '/ STATE', 'train', '/ EPSILON', 0.06027975999999449, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1053.929, '/ Loss ', 4083.945556640625)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1

('TIMESTEP', 8519, '/ STATE', 'train', '/ EPSILON', 0.060010029999994455, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1050.522, '/ Loss ', 5201.89892578125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8520, '/ STATE', 'train', '/ EPSILON', 0.06000003999999445, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1042.0925, '/ Loss ', 5152.91845703125)
('TIMESTEP', 8521, '/ STATE', 'train', '/ EPSILON', 0.06000003999999445, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 983.95032, '/ Loss ', 4479.6591796875)
('TIMESTEP', 8522, '/ STATE', 'train', '/ EPSILON', 0.05999004999999445, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 749.88733, '/ Loss ', 3787.4541015625)
('TIMESTEP', 8523, '/ STATE', 'train', '/ EPSILON', 0.05998005999999445, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 815.42151, '/ Loss ', 4463.62451171875)
('TIME

 0  0  0  0  0  0
('TIMESTEP', 8550, '/ STATE', 'train', '/ EPSILON', 0.05978025999999442, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 808.89783, '/ Loss ', 4533.7275390625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8551, '/ STATE', 'train', '/ EPSILON', 0.05977026999999442, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 755.9223, '/ Loss ', 5288.2998046875)
('TIMESTEP', 8552, '/ STATE', 'train', '/ EPSILON', 0.05977026999999442, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 955.87842, '/ Loss ', 3801.3486328125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
('TIMESTEP', 8553, '/ STATE', 'train', '/ EPSILON', 0.05976027999999442, '/ ACTION', 20, '/ R

('TIMESTEP', 8591, '/ STATE', 'train', '/ EPSILON', 0.05948055999999438, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1109.1274, '/ Loss ', 4400.0986328125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0 -1
 0  0  0  0  0  0
 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 8592, '/ STATE', 'train', '/ EPSILON', 0.05947056999999438, '/ ACTION', 22, '/ REWARD', 0, '/ Q_MAX ', 964.18988, '/ Loss ', 4795.7158203125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 8593, '/ STATE', 'train', '/ EPSILON', 0.05946057999999438, '/ ACTION', 19, '/ REWARD', 0, '/ Q_MAX ', 975.31555, '/ Loss ', 5125.06884765625)
-----------------------------------------

('TIMESTEP', 8629, '/ STATE', 'train', '/ EPSILON', 0.05919084999999434, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 737.44202, '/ Loss ', 5622.21337890625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 8630, '/ STATE', 'train', '/ EPSILON', 0.05918085999999434, '/ ACTION', 7, '/ REWARD', 0, '/ Q_MAX ', 766.40942, '/ Loss ', 4527.59130859375)
('TIMESTEP', 8631, '/ STATE', 'train', '/ EPSILON', 0.05918085999999434, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1088.9811, '/ Loss ', 6031.0361328125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  1  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0 -1  0  0  0  0
('TIMESTEP', 8632, '/ STATE', 'train', '/ EPSILON', 0.05917086999999434, '/

('TIMESTEP', 8664, '/ STATE', 'train', '/ EPSILON', 0.058941099999994306, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1063.3285, '/ Loss ', 5674.138671875)
('TIMESTEP', 8665, '/ STATE', 'train', '/ EPSILON', 0.058931109999994305, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1063.7516, '/ Loss ', 6295.904296875)
('TIMESTEP', 8666, '/ STATE', 'train', '/ EPSILON', 0.058921119999994304, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1064.0905, '/ Loss ', 8153.43017578125)
('TIMESTEP', 8667, '/ STATE', 'train', '/ EPSILON', 0.0589111299999943, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1074.5002, '/ Loss ', 5344.4921875)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8668, '/ STATE', 'train', '/ EPSILON', 0.0589011399999943, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1064.6895, '/ Loss ', 5019.1904296875)
('TIMESTEP',

('TIMESTEP', 8699, '/ STATE', 'train', '/ EPSILON', 0.05869134999999427, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 774.40247, '/ Loss ', 5723.1337890625)
('TIMESTEP', 8700, '/ STATE', 'train', '/ EPSILON', 0.05869134999999427, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 979.17395, '/ Loss ', 4777.60986328125)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8701, '/ STATE', 'train', '/ EPSILON', 0.05868135999999427, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1067.6589, '/ Loss ', 5807.7412109375)
('TIMESTEP', 8702, '/ STATE', 'train', '/ EPSILON', 0.05868135999999427, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1024.042, '/ Loss ', 5673.2412109375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  1  

('TIMESTEP', 8734, '/ STATE', 'train', '/ EPSILON', 0.058421619999994234, '/ ACTION', 11, '/ REWARD', 0, '/ Q_MAX ', 1066.1202, '/ Loss ', 4614.85302734375)
('TIMESTEP', 8735, '/ STATE', 'train', '/ EPSILON', 0.058421619999994234, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1000.0929, '/ Loss ', 4276.55810546875)
('TIMESTEP', 8736, '/ STATE', 'train', '/ EPSILON', 0.05841162999999423, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1111.3035, '/ Loss ', 4753.1787109375)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  1
 0  0  0  0  0  0
 0  0  1  0 -1  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8737, '/ STATE', 'train', '/ EPSILON', 0.05840163999999423, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 1162.6317, '/ Loss ', 5551.98681640625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  

('TIMESTEP', 8773, '/ STATE', 'train', '/ EPSILON', 0.058141899999994195, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1078.6416, '/ Loss ', 5081.60546875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0 -1
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
----------Random Action----------
('TIMESTEP', 8774, '/ STATE', 'train', '/ EPSILON', 0.058131909999994194, '/ ACTION', 11, '/ REWARD', 0, '/ Q_MAX ', 1073.2344, '/ Loss ', 4481.5732421875)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8775, '/ STATE', 'train', '/ EPSILON', 0.05812191999999419, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 777.13641, '/ Loss ', 3818.507080078125)
('TIMESTEP', 8776, '/ STATE', 'train', '/ EPSILON', 0.05812191999999419, '

('TIMESTEP', 8817, '/ STATE', 'train', '/ EPSILON', 0.05780223999999415, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1154.3971, '/ Loss ', 5366.517578125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0 -1  0  0  0
('TIMESTEP', 8818, '/ STATE', 'train', '/ EPSILON', 0.05779224999999415, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 768.43549, '/ Loss ', 4388.67529296875)
('TIMESTEP', 8819, '/ STATE', 'train', '/ EPSILON', 0.05779224999999415, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1128.2113, '/ Loss ', 6557.61474609375)
('TIMESTEP', 8820, '/ STATE', 'train', '/ EPSILON', 0.057782259999994146, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 867.56274, '/ Loss ', 4916.68701171875)
('TIMESTEP', 8821, '/ STATE', 'train', '/ EPSILON', 0.057772269999994144, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1006.8785, '/ Loss ', 4914.4990234375)
-----

('TIMESTEP', 8854, '/ STATE', 'train', '/ EPSILON', 0.05753250999999411, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1082.7772, '/ Loss ', 6481.66357421875)
('TIMESTEP', 8855, '/ STATE', 'train', '/ EPSILON', 0.05752251999999411, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1087.6664, '/ Loss ', 5489.2783203125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0 -1  0  0
 0  0  0  0  0  0
('TIMESTEP', 8856, '/ STATE', 'train', '/ EPSILON', 0.05751252999999411, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 918.74738, '/ Loss ', 4270.0224609375)
('TIMESTEP', 8857, '/ STATE', 'train', '/ EPSILON', 0.05751252999999411, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 1035.6465, '/ Loss ', 4522.6171875)
('TIMESTEP', 8858, '/ STATE', 'train', '/ EPSILON', 0.05750253999999411, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1014.6243, '/ Loss ', 5596.958984375)
------------

('TIMESTEP', 8892, '/ STATE', 'train', '/ EPSILON', 0.05726277999999407, '/ ACTION', 20, '/ REWARD', -1, '/ Q_MAX ', 1090.2167, '/ Loss ', 6366.908203125)
------------------------------------------FINAL-----------------------------------------------
Reward: -1
 0  0 -1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8893, '/ STATE', 'train', '/ EPSILON', 0.05725278999999407, '/ ACTION', 20, '/ REWARD', 0, '/ Q_MAX ', 794.95599, '/ Loss ', 7612.91259765625)
('TIMESTEP', 8894, '/ STATE', 'train', '/ EPSILON', 0.05725278999999407, '/ ACTION', 0, '/ REWARD', 0, '/ Q_MAX ', 637.31885, '/ Loss ', 7188.056640625)
------------------------------------------FINAL-----------------------------------------------
Reward: 0
 0  0  0  0  0  0
 0  0  0  0  0  0
 0  0  0 -1  0  0
 0  0  1  0  0  0
 0  0  0  0  0  0
 0  0  0  0  0  0
('TIMESTEP', 8895, '/ STATE', 'train', '/ EPSILON', 0.05724279999999407, '/ ACTION', 20, '/ REWARD', 0, '/ Q_

KeyboardInterrupt: 

In [None]:
print(game_state.next_state(22,(1,[[1, -1, -1, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]])))

In [None]:
print(game_state.initial)