In [34]:
import numpy as np
from PIL import ImageGrab
from PIL import Image
import cv2
import io; 
import time

In [112]:
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
game_url = "game/dino.html"
chrome_driver_path = "../chromedriver.exe"
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,chrome_options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.set_window_size(200, 300)
        self._driver.get(os.path.abspath(game_url))
        if custom_config:
            self._driver.execute_script("Runner.config.ACCELERATION=0")
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        return self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def press_down(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_DOWN)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array)
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
#     def grab_screen(self):
#         print("grab start")
#         image = self._driver.get_screenshot_as_png()
# #         performance_measure.send(time.time())
#         image = np.fromstring(image, np.uint8)
# #         performance_measure.send(time.time())
#         image = cv2.imdecode(image,cv2.IMREAD_UNCHANGED) 
# #         performance_measure.send(time.time())
#         image = process_img(image)
# #         performance_measure.send(time.time())
# #         cv2.imwrite('screenshot.png',image)
#         return image
    def end(self):
        self._driver.close()

In [113]:
class DinoAgent:
    def __init__(self,game):
        self._game = game;
        self.jump();
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
        time.sleep(0.5)
    def duck(self):
        self._game.press_down()

In [92]:
#processing image as required
def process_img(image):
    #game is already in grey scale canvas, canny to get only edges and reduce unwanted objects(clouds)
    image = cv2.Canny(image, threshold1 = 100, threshold2 = 200)
#     image = image[10:140,0:200] #img[y:y+h, x:x+w]
#     image = resized_image = cv2.resize(image, (80, 80)) 
    image = cv2.resize(image, (0,0), fx = 0.25, fy = 0.20)
    image = image[2:42,10:90] #img[y:y+h, x:x+w]
    return  image

In [108]:
def grab_screen(_driver = None):
    screen =  np.array(ImageGrab.grab(bbox=(0,180,400,400)))
    image = process_img(screen)
    if _driver!=None:
        image = _driver.get_screenshot_as_png()
    return image

In [96]:
# # print(grab_screen().shape)
# game = Game()
# dino = DinoAgent(game)
# last_time = time.time()
# while(True):
    
# # #     print('loop took {} seconds'.format(time.time()-last_time))
# # #     last_time = time.time()
# # #     cv2.imwrite("./img_data/dino"+str(time())+".jpg",image)
# # #     dino.duck()
# #     #exit on q pres
# # #     print('{0} {1} '.format(r_t,end_t))
# # #     cv2.imshow('window',game.grab_screen())
    
#     image = grab_screen()
#     cv2.imshow('window',image)
#     print(image.shape[0])
# #     cv2.destroyAllWindows()

# # #     from matplotlib import pyplot as plt
# # #     plt.imshow(image)
# # #     plt.title('my picture')
# # #     plt.show()

# # #     grab_screen()
# #     if(dino.is_crashed()):
# #         #jumping starts the game again if dino has crashed
# # #         temp = (game.get_score())
# #         game.restart()
#     if (cv2.waitKey(25) & 0xFF == ord('q')):
#         cv2.destroyAllWindows()
#         game.end()
# #         cv2.imwrite('dino.jpg',image)
#         break

In [124]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations
OBSERVATION = 500 #10000. # timesteps to observe before training
EXPLORE = 3000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
FRAME_PER_ACTION = 10
LEARNING_RATE = 1e-4

In [90]:
img_rows , img_cols = 80,40
img_channels = 4 #We stack 4 frames

In [33]:
def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(Convolution2D(32, 8, 8, subsample=(4, 4), border_mode='same',input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 4, 4, subsample=(2, 2), border_mode='same'))
    model.add(Activation('relu'))
    model.add(Convolution2D(64, 3, 3, subsample=(1, 1), border_mode='same'))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    print("We finish building the model")
    return model

In [118]:
def trainNetwork(model,game_state,observe=False):
    # open up a game state to communicate with emulator
    last_time = time.time()
    # store the previous observations in replay memory
    D = deque()
#     display = show_img()
#     display.__next__()
    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 
    x_t, r_0, terminal = game_state.get_state(do_nothing)
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
    

    #In Keras, need to reshape
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*80*80*4


    if observe :#args['mode'] == 'Run':
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = INITIAL_EPSILON
#         model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = 0
    while (True):
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0
        a_t = np.zeros([ACTIONS])
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0:
            if random.random() <= epsilon:
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else:
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                max_Q = np.argmax(q)
                action_index = max_Q
                a_t[action_index] = 1
        #We reduced the epsilon gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('loop took {} seconds'.format(time.time()-last_time))
        last_time = time.time()
#       display.send(x_t1)
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x80x80x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3)

        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 80, 80, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]
                state_t1 = minibatch[i][3]
                terminal = minibatch[i][4]
                # if terminated, only equals reward

                inputs[i:i + 1] = state_t    #I saved down s_t

                targets[i] = model.predict(state_t)  # Hitting each buttom probability
                Q_sa = model.predict(state_t1)

                if terminal:
                    targets[i, action_t] = reward_t
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            # targets2 = normalize(targets)
            loss += model.train_on_batch(inputs, targets)

        s_t = s_t1
        t = t + 1

        # save progress every 10000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            model.save_weights("model.h5", overwrite=True)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state, \
            "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, \
            "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")

In [29]:
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)
    model = buildmodel()
    trainNetwork(model,game_state,observe=observe)

In [122]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img()
        self._display.__next__()
    def get_state(self,actions):
        
        reward = 0.1
        is_over = False
        if actions[1] == 1:
            self._agent.jump()
            reward = 0.1
            time.sleep(0.2)
        if self._agent.is_crashed():
            self._game.restart()
            reward = -1
            is_over = True
        
#         self._game.resume()
        image = grab_screen()#self._game.grab_screen()
#         self._game.pause()
        self._display.send(image)
        return image, reward, is_over

In [17]:
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD , Adam
import tensorflow as tf
from collections import deque
import random
import json

Using TensorFlow backend.


In [18]:
def show_img():
    """
    Show images in new window
    """
    frame = 0
    while True:
        screen = (yield)
        cv2.imshow("preview", screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

        frame += 1

In [19]:
def measure_performance():
    performance_time = time.time()
    last_time = performance_time
    while True:
        performance_time = yield 
        
        print('step took {} seconds'.format(time.time()-last_time))
        last_time = performance_time

In [125]:
playGame(observe=False)

Now we build the model
We finish building the model


  after removing the cwd from sys.path.
  
  


loop took 1.1554372310638428 seconds
TIMESTEP 1 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04964709281921387 seconds
TIMESTEP 2 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04808807373046875 seconds
TIMESTEP 3 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04950594902038574 seconds
TIMESTEP 4 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05154681205749512 seconds
TIMESTEP 5 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05414295196533203 seconds
TIMESTEP 6 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.048143625259399414 seconds
TIMESTEP 7 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05717134475708008 seconds
TIMESTEP 8 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop too

loop took 0.05417752265930176 seconds
TIMESTEP 70 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.7825241088867188 seconds
TIMESTEP 71 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04609394073486328 seconds
TIMESTEP 72 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05076146125793457 seconds
TIMESTEP 73 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04945802688598633 seconds
TIMESTEP 74 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04926347732543945 seconds
TIMESTEP 75 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05378460884094238 seconds
TIMESTEP 76 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04910087585449219 seconds
TIMESTEP 77 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
l

loop took 0.051252126693725586 seconds
TIMESTEP 136 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04913592338562012 seconds
TIMESTEP 137 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05020737648010254 seconds
TIMESTEP 138 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.049863576889038086 seconds
TIMESTEP 139 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.07336544990539551 seconds
TIMESTEP 140 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.7737030982971191 seconds
TIMESTEP 141 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD -1 / Q_MAX  0 / Loss  0
loop took 0.05006742477416992 seconds
TIMESTEP 142 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04947376251220703 seconds
TIMESTEP 143 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / 

loop took 0.048369407653808594 seconds
TIMESTEP 205 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05314040184020996 seconds
TIMESTEP 206 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04795050621032715 seconds
TIMESTEP 207 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.049056291580200195 seconds
TIMESTEP 208 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05340576171875 seconds
TIMESTEP 209 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04576563835144043 seconds
TIMESTEP 210 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.7934198379516602 seconds
TIMESTEP 211 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05734610557556152 seconds
TIMESTEP 212 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Lo

loop took 0.7951021194458008 seconds
TIMESTEP 271 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.050225019454956055 seconds
TIMESTEP 272 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05026721954345703 seconds
TIMESTEP 273 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.049894094467163086 seconds
TIMESTEP 274 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.051119089126586914 seconds
TIMESTEP 275 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.052126407623291016 seconds
TIMESTEP 276 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04778337478637695 seconds
TIMESTEP 277 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.050330162048339844 seconds
TIMESTEP 278 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  

loop took 0.04712653160095215 seconds
TIMESTEP 339 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05214810371398926 seconds
TIMESTEP 340 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.7769880294799805 seconds
TIMESTEP 341 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05047440528869629 seconds
TIMESTEP 342 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05048108100891113 seconds
TIMESTEP 343 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.049100399017333984 seconds
TIMESTEP 344 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.04951977729797363 seconds
TIMESTEP 345 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05199599266052246 seconds
TIMESTEP 346 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / 

TIMESTEP 405 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05412626266479492 seconds
TIMESTEP 406 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.06207537651062012 seconds
TIMESTEP 407 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05030965805053711 seconds
TIMESTEP 408 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.050331830978393555 seconds
TIMESTEP 409 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.050575971603393555 seconds
TIMESTEP 410 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.7975664138793945 seconds
TIMESTEP 411 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.0503849983215332 seconds
TIMESTEP 412 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.0516057014465332 s

loop took 0.7921309471130371 seconds
TIMESTEP 471 / STATE observe / EPSILON 0.1 / ACTION 1 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05519914627075195 seconds
TIMESTEP 472 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05112957954406738 seconds
TIMESTEP 473 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.048288822174072266 seconds
TIMESTEP 474 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05114006996154785 seconds
TIMESTEP 475 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.0505833625793457 seconds
TIMESTEP 476 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.048711299896240234 seconds
TIMESTEP 477 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / Loss  0
loop took 0.05012321472167969 seconds
TIMESTEP 478 / STATE observe / EPSILON 0.1 / ACTION 0 / REWARD 0.1 / Q_MAX  0 / 

TIMESTEP 529 / STATE explore / EPSILON 0.09999906760000019 / ACTION 0 / REWARD 0.1 / Q_MAX  17.5364 / Loss  2.09858608246
loop took 0.2674679756164551 seconds
TIMESTEP 530 / STATE explore / EPSILON 0.09999903430000019 / ACTION 0 / REWARD 0.1 / Q_MAX  7.02163 / Loss  4.02357625961
loop took 0.27172255516052246 seconds
TIMESTEP 531 / STATE explore / EPSILON 0.0999990010000002 / ACTION 0 / REWARD 0.1 / Q_MAX  11.3673 / Loss  6.24352931976
loop took 0.2606940269470215 seconds
TIMESTEP 532 / STATE explore / EPSILON 0.0999989677000002 / ACTION 0 / REWARD 0.1 / Q_MAX  14.8365 / Loss  1.92057216167
loop took 0.2654876708984375 seconds
TIMESTEP 533 / STATE explore / EPSILON 0.09999893440000021 / ACTION 0 / REWARD 0.1 / Q_MAX  17.3212 / Loss  3.33626675606
loop took 0.2664623260498047 seconds
TIMESTEP 534 / STATE explore / EPSILON 0.09999890110000022 / ACTION 0 / REWARD 0.1 / Q_MAX  17.4759 / Loss  9.05869197845
loop took 0.28385186195373535 seconds
TIMESTEP 535 / STATE explore / EPSILON 0.09999

TIMESTEP 581 / STATE explore / EPSILON 0.09999733600000052 / ACTION 0 / REWARD 0.1 / Q_MAX  12.5892 / Loss  2.57157468796
loop took 0.2647073268890381 seconds
TIMESTEP 582 / STATE explore / EPSILON 0.09999730270000053 / ACTION 0 / REWARD 0.1 / Q_MAX  13.8925 / Loss  0.781360864639
loop took 0.2635464668273926 seconds
TIMESTEP 583 / STATE explore / EPSILON 0.09999726940000053 / ACTION 0 / REWARD -1 / Q_MAX  13.7766 / Loss  5.27418279648
loop took 0.26973748207092285 seconds
TIMESTEP 584 / STATE explore / EPSILON 0.09999723610000054 / ACTION 0 / REWARD 0.1 / Q_MAX  13.4546 / Loss  2.1119248867
loop took 0.26746296882629395 seconds
TIMESTEP 585 / STATE explore / EPSILON 0.09999720280000055 / ACTION 0 / REWARD 0.1 / Q_MAX  13.8192 / Loss  1.55677437782
loop took 0.27750682830810547 seconds
TIMESTEP 586 / STATE explore / EPSILON 0.09999716950000055 / ACTION 0 / REWARD 0.1 / Q_MAX  12.5915 / Loss  1.37433552742
loop took 0.26906347274780273 seconds
TIMESTEP 587 / STATE explore / EPSILON 0.09

TIMESTEP 633 / STATE explore / EPSILON 0.09999560440000085 / ACTION 0 / REWARD 0.1 / Q_MAX  10.3351 / Loss  1.3217279911
loop took 0.26671791076660156 seconds
TIMESTEP 634 / STATE explore / EPSILON 0.09999557110000086 / ACTION 0 / REWARD 0.1 / Q_MAX  8.59366 / Loss  1.31859087944
loop took 0.24807453155517578 seconds
TIMESTEP 635 / STATE explore / EPSILON 0.09999553780000087 / ACTION 0 / REWARD -1 / Q_MAX  8.11859 / Loss  2.01027345657
loop took 0.25066518783569336 seconds
TIMESTEP 636 / STATE explore / EPSILON 0.09999550450000087 / ACTION 0 / REWARD 0.1 / Q_MAX  8.40097 / Loss  2.0291261673
loop took 0.26570677757263184 seconds
TIMESTEP 637 / STATE explore / EPSILON 0.09999547120000088 / ACTION 0 / REWARD 0.1 / Q_MAX  8.48242 / Loss  0.735618829727
loop took 0.27974724769592285 seconds
TIMESTEP 638 / STATE explore / EPSILON 0.09999543790000089 / ACTION 0 / REWARD 0.1 / Q_MAX  8.15234 / Loss  0.833677589893
loop took 0.28026533126831055 seconds
TIMESTEP 639 / STATE explore / EPSILON 0.

TIMESTEP 685 / STATE explore / EPSILON 0.09999387280000119 / ACTION 0 / REWARD -1 / Q_MAX  4.28962 / Loss  0.124711513519
loop took 0.2738037109375 seconds
TIMESTEP 686 / STATE explore / EPSILON 0.0999938395000012 / ACTION 0 / REWARD 0.1 / Q_MAX  6.51816 / Loss  0.302689731121
loop took 0.26269030570983887 seconds
TIMESTEP 687 / STATE explore / EPSILON 0.0999938062000012 / ACTION 0 / REWARD 0.1 / Q_MAX  4.4572 / Loss  0.570441067219
loop took 0.2787458896636963 seconds
TIMESTEP 688 / STATE explore / EPSILON 0.09999377290000121 / ACTION 0 / REWARD 0.1 / Q_MAX  5.10352 / Loss  0.490962147713
loop took 0.26573872566223145 seconds
TIMESTEP 689 / STATE explore / EPSILON 0.09999373960000121 / ACTION 0 / REWARD 0.1 / Q_MAX  5.08739 / Loss  0.304995834827
loop took 0.24965810775756836 seconds
TIMESTEP 690 / STATE explore / EPSILON 0.09999370630000122 / ACTION 0 / REWARD 0.1 / Q_MAX  4.07791 / Loss  0.254730165005
loop took 0.26676034927368164 seconds
TIMESTEP 691 / STATE explore / EPSILON 0.09

TIMESTEP 737 / STATE explore / EPSILON 0.09999214120000152 / ACTION 0 / REWARD 0.1 / Q_MAX  5.96755 / Loss  0.816492557526
loop took 0.26871776580810547 seconds
TIMESTEP 738 / STATE explore / EPSILON 0.09999210790000153 / ACTION 0 / REWARD 0.1 / Q_MAX  4.08338 / Loss  0.168796747923
loop took 0.28974413871765137 seconds
TIMESTEP 739 / STATE explore / EPSILON 0.09999207460000153 / ACTION 0 / REWARD 0.1 / Q_MAX  4.46063 / Loss  0.461394816637
loop took 0.28990983963012695 seconds
TIMESTEP 740 / STATE explore / EPSILON 0.09999204130000154 / ACTION 0 / REWARD -1 / Q_MAX  4.96852 / Loss  0.334900081158
loop took 0.26773643493652344 seconds
TIMESTEP 741 / STATE explore / EPSILON 0.09999200800000155 / ACTION 0 / REWARD 0.1 / Q_MAX  5.09665 / Loss  0.26780089736
loop took 0.29476404190063477 seconds
TIMESTEP 742 / STATE explore / EPSILON 0.09999197470000155 / ACTION 0 / REWARD 0.1 / Q_MAX  5.89519 / Loss  0.505500257015
loop took 0.3011155128479004 seconds
TIMESTEP 743 / STATE explore / EPSILO

TIMESTEP 788 / STATE explore / EPSILON 0.09999044290000185 / ACTION 0 / REWARD 0.1 / Q_MAX  4.28846 / Loss  0.149414300919
loop took 0.266704797744751 seconds
TIMESTEP 789 / STATE explore / EPSILON 0.09999040960000186 / ACTION 0 / REWARD 0.1 / Q_MAX  5.33595 / Loss  0.61033642292
loop took 0.27931904792785645 seconds
TIMESTEP 790 / STATE explore / EPSILON 0.09999037630000186 / ACTION 0 / REWARD 0.1 / Q_MAX  3.8388 / Loss  0.497801184654
loop took 1.015120506286621 seconds
TIMESTEP 791 / STATE explore / EPSILON 0.09999034300000187 / ACTION 1 / REWARD -1 / Q_MAX  3.18962 / Loss  0.195938169956
loop took 0.2706305980682373 seconds
TIMESTEP 792 / STATE explore / EPSILON 0.09999030970000188 / ACTION 0 / REWARD 0.1 / Q_MAX  3.8591 / Loss  0.300837308168
loop took 0.26800084114074707 seconds
TIMESTEP 793 / STATE explore / EPSILON 0.09999027640000188 / ACTION 0 / REWARD 0.1 / Q_MAX  3.57632 / Loss  0.551370978355
loop took 0.2657008171081543 seconds
TIMESTEP 794 / STATE explore / EPSILON 0.099

TIMESTEP 840 / STATE explore / EPSILON 0.09998871130000218 / ACTION 0 / REWARD 0.1 / Q_MAX  3.6561 / Loss  0.0558215565979
loop took 0.2868311405181885 seconds
TIMESTEP 841 / STATE explore / EPSILON 0.09998867800000219 / ACTION 0 / REWARD 0.1 / Q_MAX  3.69768 / Loss  1.46427834034
loop took 0.27673912048339844 seconds
TIMESTEP 842 / STATE explore / EPSILON 0.0999886447000022 / ACTION 0 / REWARD 0.1 / Q_MAX  3.59757 / Loss  0.29178571701
loop took 0.25734639167785645 seconds
TIMESTEP 843 / STATE explore / EPSILON 0.0999886114000022 / ACTION 0 / REWARD -1 / Q_MAX  4.0181 / Loss  0.227666437626
loop took 0.26470446586608887 seconds
TIMESTEP 844 / STATE explore / EPSILON 0.09998857810000221 / ACTION 0 / REWARD 0.1 / Q_MAX  3.8154 / Loss  0.117723211646
loop took 0.27883410453796387 seconds
TIMESTEP 845 / STATE explore / EPSILON 0.09998854480000222 / ACTION 0 / REWARD 0.1 / Q_MAX  3.84478 / Loss  1.44116222858
loop took 0.30521726608276367 seconds
TIMESTEP 846 / STATE explore / EPSILON 0.09

TIMESTEP 892 / STATE explore / EPSILON 0.09998697970000252 / ACTION 0 / REWARD 0.1 / Q_MAX  3.64019 / Loss  0.270430713892
loop took 0.2667102813720703 seconds
TIMESTEP 893 / STATE explore / EPSILON 0.09998694640000252 / ACTION 0 / REWARD 0.1 / Q_MAX  3.70868 / Loss  0.306335121393
loop took 0.2655296325683594 seconds
TIMESTEP 894 / STATE explore / EPSILON 0.09998691310000253 / ACTION 0 / REWARD 0.1 / Q_MAX  3.26778 / Loss  0.226859286427
loop took 0.2667045593261719 seconds
TIMESTEP 895 / STATE explore / EPSILON 0.09998687980000254 / ACTION 0 / REWARD 0.1 / Q_MAX  9.28698 / Loss  0.164645254612
loop took 0.24897027015686035 seconds
TIMESTEP 896 / STATE explore / EPSILON 0.09998684650000254 / ACTION 0 / REWARD -1 / Q_MAX  1.89303 / Loss  0.184655874968
loop took 0.26770567893981934 seconds
TIMESTEP 897 / STATE explore / EPSILON 0.09998681320000255 / ACTION 0 / REWARD 0.1 / Q_MAX  3.58976 / Loss  0.126876950264
loop took 0.275850772857666 seconds
TIMESTEP 898 / STATE explore / EPSILON 0

TIMESTEP 944 / STATE explore / EPSILON 0.09998524810000285 / ACTION 0 / REWARD 0.1 / Q_MAX  2.89948 / Loss  0.306496918201
loop took 0.2797715663909912 seconds
TIMESTEP 945 / STATE explore / EPSILON 0.09998521480000286 / ACTION 0 / REWARD -1 / Q_MAX  2.90317 / Loss  0.608721673489
loop took 0.28081536293029785 seconds
TIMESTEP 946 / STATE explore / EPSILON 0.09998518150000286 / ACTION 0 / REWARD 0.1 / Q_MAX  2.87757 / Loss  0.197635903955
loop took 0.3045828342437744 seconds
TIMESTEP 947 / STATE explore / EPSILON 0.09998514820000287 / ACTION 0 / REWARD 0.1 / Q_MAX  2.86852 / Loss  0.727216660976
loop took 0.26580309867858887 seconds
TIMESTEP 948 / STATE explore / EPSILON 0.09998511490000288 / ACTION 0 / REWARD 0.1 / Q_MAX  2.44561 / Loss  1.02252328396
loop took 0.2784247398376465 seconds
TIMESTEP 949 / STATE explore / EPSILON 0.09998508160000288 / ACTION 0 / REWARD 0.1 / Q_MAX  2.82972 / Loss  0.761111974716
loop took 0.2805051803588867 seconds
TIMESTEP 950 / STATE explore / EPSILON 0

TIMESTEP 996 / STATE explore / EPSILON 0.09998351650000319 / ACTION 0 / REWARD 0.1 / Q_MAX  3.19106 / Loss  0.305508136749
loop took 0.266707181930542 seconds
TIMESTEP 997 / STATE explore / EPSILON 0.09998348320000319 / ACTION 0 / REWARD 0.1 / Q_MAX  2.42996 / Loss  0.218197092414
loop took 0.2657158374786377 seconds
TIMESTEP 998 / STATE explore / EPSILON 0.0999834499000032 / ACTION 0 / REWARD 0.1 / Q_MAX  2.36864 / Loss  2.60536193848
loop took 0.2667093276977539 seconds
TIMESTEP 999 / STATE explore / EPSILON 0.0999834166000032 / ACTION 0 / REWARD 0.1 / Q_MAX  2.34154 / Loss  0.0355977863073
loop took 0.2611050605773926 seconds
Now we save model
TIMESTEP 1000 / STATE explore / EPSILON 0.09998338330000321 / ACTION 0 / REWARD -1 / Q_MAX  2.39845 / Loss  0.0355077087879
loop took 0.4287071228027344 seconds
TIMESTEP 1001 / STATE explore / EPSILON 0.09998335000000322 / ACTION 0 / REWARD 0.1 / Q_MAX  2.25355 / Loss  0.0550810843706
loop took 0.28768134117126465 seconds
TIMESTEP 1002 / STATE

TIMESTEP 1047 / STATE explore / EPSILON 0.09998181820000351 / ACTION 0 / REWARD 0.1 / Q_MAX  2.80125 / Loss  0.0681995898485
loop took 0.29578185081481934 seconds
TIMESTEP 1048 / STATE explore / EPSILON 0.09998178490000352 / ACTION 0 / REWARD 0.1 / Q_MAX  8.9123 / Loss  1.38542068005
loop took 0.28075218200683594 seconds
TIMESTEP 1049 / STATE explore / EPSILON 0.09998175160000353 / ACTION 0 / REWARD 0.1 / Q_MAX  0.463929 / Loss  0.169510602951
loop took 0.2873659133911133 seconds
TIMESTEP 1050 / STATE explore / EPSILON 0.09998171830000353 / ACTION 0 / REWARD 0.1 / Q_MAX  2.35348 / Loss  0.061589114368
loop took 0.29686737060546875 seconds
TIMESTEP 1051 / STATE explore / EPSILON 0.09998168500000354 / ACTION 0 / REWARD 0.1 / Q_MAX  2.40547 / Loss  0.0503859445453
loop took 0.30182456970214844 seconds
TIMESTEP 1052 / STATE explore / EPSILON 0.09998165170000355 / ACTION 0 / REWARD 0.1 / Q_MAX  2.65992 / Loss  0.0639514625072
loop took 0.27031588554382324 seconds
TIMESTEP 1053 / STATE explo

TIMESTEP 1098 / STATE explore / EPSILON 0.09998011990000384 / ACTION 0 / REWARD 0.1 / Q_MAX  1.11478 / Loss  0.0453124195337
loop took 0.2516815662384033 seconds
TIMESTEP 1099 / STATE explore / EPSILON 0.09998008660000385 / ACTION 0 / REWARD 0.1 / Q_MAX  3.40357 / Loss  0.0330956876278
loop took 0.26470518112182617 seconds
TIMESTEP 1100 / STATE explore / EPSILON 0.09998005330000385 / ACTION 0 / REWARD 0.1 / Q_MAX  1.54086 / Loss  0.110633172095
loop took 0.2667093276977539 seconds
TIMESTEP 1101 / STATE explore / EPSILON 0.09998002000000386 / ACTION 0 / REWARD 0.1 / Q_MAX  3.37777 / Loss  0.152082264423
loop took 0.29269862174987793 seconds
TIMESTEP 1102 / STATE explore / EPSILON 0.09997998670000387 / ACTION 0 / REWARD -1 / Q_MAX  2.63688 / Loss  0.127262055874
loop took 0.2697134017944336 seconds
TIMESTEP 1103 / STATE explore / EPSILON 0.09997995340000387 / ACTION 0 / REWARD 0.1 / Q_MAX  2.41029 / Loss  0.0743873864412
loop took 0.26812171936035156 seconds
TIMESTEP 1104 / STATE explore

TIMESTEP 1149 / STATE explore / EPSILON 0.09997842160000417 / ACTION 0 / REWARD 0.1 / Q_MAX  0.613032 / Loss  0.0649770647287
loop took 0.26770877838134766 seconds
TIMESTEP 1150 / STATE explore / EPSILON 0.09997838830000418 / ACTION 0 / REWARD 0.1 / Q_MAX  3.88483 / Loss  0.0691561847925
loop took 0.27636122703552246 seconds
TIMESTEP 1151 / STATE explore / EPSILON 0.09997835500000418 / ACTION 0 / REWARD 0.1 / Q_MAX  2.41167 / Loss  0.0276343002915
loop took 0.2876279354095459 seconds
TIMESTEP 1152 / STATE explore / EPSILON 0.09997832170000419 / ACTION 0 / REWARD 0.1 / Q_MAX  2.17631 / Loss  0.10033531487
loop took 0.26271986961364746 seconds
TIMESTEP 1153 / STATE explore / EPSILON 0.0999782884000042 / ACTION 0 / REWARD -1 / Q_MAX  2.51358 / Loss  0.0637576505542
loop took 0.26879072189331055 seconds
TIMESTEP 1154 / STATE explore / EPSILON 0.0999782551000042 / ACTION 0 / REWARD 0.1 / Q_MAX  2.15201 / Loss  0.0795105099678
loop took 0.2803030014038086 seconds
TIMESTEP 1155 / STATE explor

TIMESTEP 1200 / STATE explore / EPSILON 0.0999767233000045 / ACTION 0 / REWARD -1 / Q_MAX  2.58814 / Loss  0.0365793555975
loop took 0.26570701599121094 seconds
TIMESTEP 1201 / STATE explore / EPSILON 0.0999766900000045 / ACTION 0 / REWARD 0.1 / Q_MAX  2.46488 / Loss  0.0797402262688
loop took 0.268721342086792 seconds
TIMESTEP 1202 / STATE explore / EPSILON 0.09997665670000451 / ACTION 0 / REWARD 0.1 / Q_MAX  2.58497 / Loss  0.0656171664596
loop took 0.2767326831817627 seconds
TIMESTEP 1203 / STATE explore / EPSILON 0.09997662340000452 / ACTION 0 / REWARD 0.1 / Q_MAX  2.98265 / Loss  0.0403229556978
loop took 0.3005075454711914 seconds
TIMESTEP 1204 / STATE explore / EPSILON 0.09997659010000452 / ACTION 0 / REWARD 0.1 / Q_MAX  2.69492 / Loss  0.0614543110132
loop took 0.2841503620147705 seconds
TIMESTEP 1205 / STATE explore / EPSILON 0.09997655680000453 / ACTION 0 / REWARD 0.1 / Q_MAX  2.47668 / Loss  0.0554840862751
loop took 0.26770615577697754 seconds
TIMESTEP 1206 / STATE explore 

TIMESTEP 1251 / STATE explore / EPSILON 0.09997502500000482 / ACTION 0 / REWARD 0.1 / Q_MAX  2.85366 / Loss  0.0358003377914
loop took 0.2848503589630127 seconds
TIMESTEP 1252 / STATE explore / EPSILON 0.09997499170000483 / ACTION 0 / REWARD 0.1 / Q_MAX  1.46869 / Loss  0.162488356233
loop took 0.27974367141723633 seconds
TIMESTEP 1253 / STATE explore / EPSILON 0.09997495840000484 / ACTION 0 / REWARD 0.1 / Q_MAX  2.01765 / Loss  0.0999452769756
loop took 0.30448365211486816 seconds
TIMESTEP 1254 / STATE explore / EPSILON 0.09997492510000484 / ACTION 0 / REWARD 0.1 / Q_MAX  2.15934 / Loss  0.027787938714
loop took 0.29521679878234863 seconds
TIMESTEP 1255 / STATE explore / EPSILON 0.09997489180000485 / ACTION 0 / REWARD 0.1 / Q_MAX  2.07175 / Loss  0.100513473153
loop took 0.30421876907348633 seconds
TIMESTEP 1256 / STATE explore / EPSILON 0.09997485850000486 / ACTION 0 / REWARD 0.1 / Q_MAX  2.12019 / Loss  1.39712071419
loop took 0.2787480354309082 seconds
TIMESTEP 1257 / STATE explore

TIMESTEP 1302 / STATE explore / EPSILON 0.09997332670000515 / ACTION 0 / REWARD 0.1 / Q_MAX  1.79412 / Loss  0.122368820012
loop took 0.27561426162719727 seconds
TIMESTEP 1303 / STATE explore / EPSILON 0.09997329340000516 / ACTION 0 / REWARD 0.1 / Q_MAX  2.19096 / Loss  0.0848150253296
loop took 0.2967953681945801 seconds
TIMESTEP 1304 / STATE explore / EPSILON 0.09997326010000517 / ACTION 0 / REWARD 0.1 / Q_MAX  2.38847 / Loss  0.0780341550708
loop took 0.27168893814086914 seconds
TIMESTEP 1305 / STATE explore / EPSILON 0.09997322680000517 / ACTION 0 / REWARD 0.1 / Q_MAX  3.08009 / Loss  0.126845300198
loop took 0.2682840824127197 seconds
TIMESTEP 1306 / STATE explore / EPSILON 0.09997319350000518 / ACTION 0 / REWARD -1 / Q_MAX  2.34699 / Loss  0.134940847754
loop took 0.2641303539276123 seconds
TIMESTEP 1307 / STATE explore / EPSILON 0.09997316020000518 / ACTION 0 / REWARD 0.1 / Q_MAX  2.08764 / Loss  0.162544071674
loop took 0.2687110900878906 seconds
TIMESTEP 1308 / STATE explore /

TIMESTEP 1353 / STATE explore / EPSILON 0.09997162840000548 / ACTION 0 / REWARD 0.1 / Q_MAX  1.71213 / Loss  0.0298569127917
loop took 0.2957594394683838 seconds
TIMESTEP 1354 / STATE explore / EPSILON 0.09997159510000549 / ACTION 0 / REWARD 0.1 / Q_MAX  1.51293 / Loss  0.0330535918474
loop took 0.28776121139526367 seconds
TIMESTEP 1355 / STATE explore / EPSILON 0.09997156180000549 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0968676 / Loss  0.0737848207355
loop took 0.2941403388977051 seconds
TIMESTEP 1356 / STATE explore / EPSILON 0.0999715285000055 / ACTION 0 / REWARD 0.1 / Q_MAX  1.39256 / Loss  1.20329511166
loop took 0.28733062744140625 seconds
TIMESTEP 1357 / STATE explore / EPSILON 0.0999714952000055 / ACTION 0 / REWARD 0.1 / Q_MAX  7.374 / Loss  0.0387069061399
loop took 0.3098294734954834 seconds
TIMESTEP 1358 / STATE explore / EPSILON 0.09997146190000551 / ACTION 0 / REWARD -1 / Q_MAX  1.02433 / Loss  0.0646388605237
loop took 0.27199673652648926 seconds
TIMESTEP 1359 / STATE explore 

TIMESTEP 1404 / STATE explore / EPSILON 0.09996993010000581 / ACTION 0 / REWARD 0.1 / Q_MAX  2.10014 / Loss  0.192373886704
loop took 0.26134395599365234 seconds
TIMESTEP 1405 / STATE explore / EPSILON 0.09996989680000581 / ACTION 0 / REWARD -1 / Q_MAX  2.15948 / Loss  0.16770093143
loop took 0.2808053493499756 seconds
TIMESTEP 1406 / STATE explore / EPSILON 0.09996986350000582 / ACTION 0 / REWARD 0.1 / Q_MAX  1.97298 / Loss  0.0910413563251
loop took 0.29981446266174316 seconds
TIMESTEP 1407 / STATE explore / EPSILON 0.09996983020000583 / ACTION 0 / REWARD 0.1 / Q_MAX  1.07096 / Loss  0.189732149243
loop took 0.3040473461151123 seconds
TIMESTEP 1408 / STATE explore / EPSILON 0.09996979690000583 / ACTION 0 / REWARD 0.1 / Q_MAX  1.69057 / Loss  0.0746749490499
loop took 0.2687203884124756 seconds
TIMESTEP 1409 / STATE explore / EPSILON 0.09996976360000584 / ACTION 0 / REWARD 0.1 / Q_MAX  1.74653 / Loss  0.0715385377407
loop took 0.2828648090362549 seconds
TIMESTEP 1410 / STATE explore /

TIMESTEP 1455 / STATE explore / EPSILON 0.09996823180000614 / ACTION 0 / REWARD 0.1 / Q_MAX  0.940281 / Loss  0.0637030601501
loop took 0.27056455612182617 seconds
TIMESTEP 1456 / STATE explore / EPSILON 0.09996819850000614 / ACTION 0 / REWARD 0.1 / Q_MAX  2.09036 / Loss  0.0466103740036
loop took 0.27690863609313965 seconds
TIMESTEP 1457 / STATE explore / EPSILON 0.09996816520000615 / ACTION 0 / REWARD 0.1 / Q_MAX  2.34755 / Loss  0.0432769358158
loop took 0.29678773880004883 seconds
TIMESTEP 1458 / STATE explore / EPSILON 0.09996813190000615 / ACTION 0 / REWARD 0.1 / Q_MAX  3.03708 / Loss  0.0573285073042
loop took 0.24328374862670898 seconds
TIMESTEP 1459 / STATE explore / EPSILON 0.09996809860000616 / ACTION 0 / REWARD -1 / Q_MAX  1.75835 / Loss  0.163315802813
loop took 0.2667081356048584 seconds
TIMESTEP 1460 / STATE explore / EPSILON 0.09996806530000617 / ACTION 0 / REWARD 0.1 / Q_MAX  0.460398 / Loss  0.205729827285
----------Random Action----------
loop took 1.0039827823638916

TIMESTEP 1506 / STATE explore / EPSILON 0.09996653350000646 / ACTION 0 / REWARD 0.1 / Q_MAX  1.50825 / Loss  0.0640233010054
loop took 0.28304433822631836 seconds
TIMESTEP 1507 / STATE explore / EPSILON 0.09996650020000647 / ACTION 0 / REWARD -1 / Q_MAX  1.40848 / Loss  0.0332296080887
loop took 0.28378939628601074 seconds
TIMESTEP 1508 / STATE explore / EPSILON 0.09996646690000648 / ACTION 0 / REWARD 0.1 / Q_MAX  0.344554 / Loss  0.0570657886565
loop took 0.28275203704833984 seconds
TIMESTEP 1509 / STATE explore / EPSILON 0.09996643360000648 / ACTION 0 / REWARD 0.1 / Q_MAX  1.01542 / Loss  0.065116725862
loop took 0.294111967086792 seconds
TIMESTEP 1510 / STATE explore / EPSILON 0.09996640030000649 / ACTION 0 / REWARD 0.1 / Q_MAX  1.34508 / Loss  0.0542862266302
loop took 0.3166182041168213 seconds
TIMESTEP 1511 / STATE explore / EPSILON 0.0999663670000065 / ACTION 0 / REWARD 0.1 / Q_MAX  1.04451 / Loss  0.100602075458
loop took 0.2877624034881592 seconds
TIMESTEP 1512 / STATE explore

TIMESTEP 1557 / STATE explore / EPSILON 0.09996483520000679 / ACTION 0 / REWARD 0.1 / Q_MAX  1.69783 / Loss  0.0713883563876
loop took 0.3158409595489502 seconds
TIMESTEP 1558 / STATE explore / EPSILON 0.0999648019000068 / ACTION 0 / REWARD -1 / Q_MAX  1.64523 / Loss  0.024381686002
loop took 0.3148367404937744 seconds
TIMESTEP 1559 / STATE explore / EPSILON 0.0999647686000068 / ACTION 0 / REWARD 0.1 / Q_MAX  1.61561 / Loss  0.0427917540073
loop took 0.3188486099243164 seconds
TIMESTEP 1560 / STATE explore / EPSILON 0.09996473530000681 / ACTION 0 / REWARD 0.1 / Q_MAX  1.60903 / Loss  0.0573198236525
loop took 0.3328855037689209 seconds
TIMESTEP 1561 / STATE explore / EPSILON 0.09996470200000682 / ACTION 0 / REWARD 0.1 / Q_MAX  1.61147 / Loss  0.0775633752346
loop took 0.32692646980285645 seconds
TIMESTEP 1562 / STATE explore / EPSILON 0.09996466870000682 / ACTION 0 / REWARD 0.1 / Q_MAX  1.61902 / Loss  0.0949925929308
loop took 0.2998082637786865 seconds
TIMESTEP 1563 / STATE explore /

TIMESTEP 1608 / STATE explore / EPSILON 0.09996313690000712 / ACTION 0 / REWARD -1 / Q_MAX  1.36286 / Loss  0.0591137856245
loop took 0.2967867851257324 seconds
TIMESTEP 1609 / STATE explore / EPSILON 0.09996310360000712 / ACTION 0 / REWARD 0.1 / Q_MAX  1.79736 / Loss  0.137331008911
loop took 0.28273868560791016 seconds
TIMESTEP 1610 / STATE explore / EPSILON 0.09996307030000713 / ACTION 0 / REWARD 0.1 / Q_MAX  1.78505 / Loss  0.108926787972
loop took 0.32686877250671387 seconds
TIMESTEP 1611 / STATE explore / EPSILON 0.09996303700000714 / ACTION 0 / REWARD 0.1 / Q_MAX  1.87934 / Loss  0.16524720192
loop took 0.2988462448120117 seconds
TIMESTEP 1612 / STATE explore / EPSILON 0.09996300370000714 / ACTION 0 / REWARD 0.1 / Q_MAX  1.88788 / Loss  0.14304921031
loop took 0.2837646007537842 seconds
TIMESTEP 1613 / STATE explore / EPSILON 0.09996297040000715 / ACTION 0 / REWARD 0.1 / Q_MAX  1.63772 / Loss  0.0285067372024
loop took 0.28777217864990234 seconds
TIMESTEP 1614 / STATE explore / 

TIMESTEP 1659 / STATE explore / EPSILON 0.09996143860000745 / ACTION 0 / REWARD -1 / Q_MAX  1.16384 / Loss  0.0366225130856
loop took 0.2838249206542969 seconds
TIMESTEP 1660 / STATE explore / EPSILON 0.09996140530000745 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.388354 / Loss  0.0533873327076
loop took 0.28617191314697266 seconds
TIMESTEP 1661 / STATE explore / EPSILON 0.09996137200000746 / ACTION 0 / REWARD 0.1 / Q_MAX  1.54862 / Loss  0.149956956506
loop took 0.28446245193481445 seconds
TIMESTEP 1662 / STATE explore / EPSILON 0.09996133870000747 / ACTION 0 / REWARD 0.1 / Q_MAX  1.63245 / Loss  0.018649213016
loop took 0.29526209831237793 seconds
TIMESTEP 1663 / STATE explore / EPSILON 0.09996130540000747 / ACTION 0 / REWARD 0.1 / Q_MAX  1.78814 / Loss  0.0768514871597
loop took 0.30294108390808105 seconds
TIMESTEP 1664 / STATE explore / EPSILON 0.09996127210000748 / ACTION 0 / REWARD 0.1 / Q_MAX  1.3779 / Loss  0.0849162489176
loop took 0.28415560722351074 seconds
TIMESTEP 1665 / STATE exp

TIMESTEP 1710 / STATE explore / EPSILON 0.09995974030000777 / ACTION 0 / REWARD 0.1 / Q_MAX  1.76509 / Loss  0.111237235367
loop took 0.2677175998687744 seconds
TIMESTEP 1711 / STATE explore / EPSILON 0.09995970700000778 / ACTION 0 / REWARD 0.1 / Q_MAX  0.085033 / Loss  0.111040003598
loop took 0.27602267265319824 seconds
TIMESTEP 1712 / STATE explore / EPSILON 0.09995967370000779 / ACTION 0 / REWARD 0.1 / Q_MAX  2.14223 / Loss  0.0350587219
loop took 0.2858128547668457 seconds
TIMESTEP 1713 / STATE explore / EPSILON 0.0999596404000078 / ACTION 0 / REWARD 0.1 / Q_MAX  1.62851 / Loss  0.0975933372974
loop took 0.2980031967163086 seconds
TIMESTEP 1714 / STATE explore / EPSILON 0.0999596071000078 / ACTION 0 / REWARD 0.1 / Q_MAX  0.906406 / Loss  0.100276619196
loop took 0.2877652645111084 seconds
TIMESTEP 1715 / STATE explore / EPSILON 0.0999595738000078 / ACTION 0 / REWARD 0.1 / Q_MAX  1.59506 / Loss  0.0385628119111
loop took 0.28165555000305176 seconds
TIMESTEP 1716 / STATE explore / E

loop took 1.0523436069488525 seconds
TIMESTEP 1761 / STATE explore / EPSILON 0.0999580420000081 / ACTION 1 / REWARD -1 / Q_MAX  1.94132 / Loss  0.611483871937
loop took 0.27759480476379395 seconds
TIMESTEP 1762 / STATE explore / EPSILON 0.09995800870000811 / ACTION 0 / REWARD 0.1 / Q_MAX  1.24389 / Loss  0.0483082607388
loop took 0.28173112869262695 seconds
TIMESTEP 1763 / STATE explore / EPSILON 0.09995797540000811 / ACTION 0 / REWARD 0.1 / Q_MAX  1.75627 / Loss  0.102358274162
loop took 0.2800719738006592 seconds
TIMESTEP 1764 / STATE explore / EPSILON 0.09995794210000812 / ACTION 0 / REWARD 0.1 / Q_MAX  1.20774 / Loss  0.0450287312269
loop took 0.2727172374725342 seconds
TIMESTEP 1765 / STATE explore / EPSILON 0.09995790880000813 / ACTION 0 / REWARD 0.1 / Q_MAX  0.484325 / Loss  0.0985668376088
loop took 0.29288387298583984 seconds
TIMESTEP 1766 / STATE explore / EPSILON 0.09995787550000813 / ACTION 0 / REWARD 0.1 / Q_MAX  1.4056 / Loss  0.572559714317
loop took 0.2850837707519531 s

TIMESTEP 1812 / STATE explore / EPSILON 0.09995634370000843 / ACTION 0 / REWARD 0.1 / Q_MAX  1.65489 / Loss  0.0473167784512
loop took 0.2677156925201416 seconds
TIMESTEP 1813 / STATE explore / EPSILON 0.09995631040000844 / ACTION 0 / REWARD 0.1 / Q_MAX  1.13142 / Loss  0.0628211349249
loop took 0.2746541500091553 seconds
TIMESTEP 1814 / STATE explore / EPSILON 0.09995627710000844 / ACTION 0 / REWARD 0.1 / Q_MAX  2.30001 / Loss  0.123801067472
loop took 0.2848076820373535 seconds
TIMESTEP 1815 / STATE explore / EPSILON 0.09995624380000845 / ACTION 0 / REWARD 0.1 / Q_MAX  1.11022 / Loss  1.02110350132
loop took 0.280895471572876 seconds
TIMESTEP 1816 / STATE explore / EPSILON 0.09995621050000845 / ACTION 0 / REWARD 0.1 / Q_MAX  1.21213 / Loss  0.152082636952
loop took 0.2548856735229492 seconds
TIMESTEP 1817 / STATE explore / EPSILON 0.09995617720000846 / ACTION 0 / REWARD -1 / Q_MAX  1.55826 / Loss  0.970846772194
loop took 0.27982163429260254 seconds
TIMESTEP 1818 / STATE explore / EP

TIMESTEP 1863 / STATE explore / EPSILON 0.09995464540000876 / ACTION 0 / REWARD 0.1 / Q_MAX  1.35629 / Loss  0.168406546116
loop took 0.2667098045349121 seconds
TIMESTEP 1864 / STATE explore / EPSILON 0.09995461210000876 / ACTION 0 / REWARD 0.1 / Q_MAX  2.73049 / Loss  0.0791798830032
loop took 0.26767468452453613 seconds
TIMESTEP 1865 / STATE explore / EPSILON 0.09995457880000877 / ACTION 0 / REWARD -1 / Q_MAX  0.837088 / Loss  0.0639436170459
loop took 0.29575133323669434 seconds
TIMESTEP 1866 / STATE explore / EPSILON 0.09995454550000878 / ACTION 0 / REWARD 0.1 / Q_MAX  1.17162 / Loss  0.0824534446001
loop took 0.2955310344696045 seconds
TIMESTEP 1867 / STATE explore / EPSILON 0.09995451220000878 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.159643 / Loss  0.113302856684
loop took 0.2727169990539551 seconds
TIMESTEP 1868 / STATE explore / EPSILON 0.09995447890000879 / ACTION 0 / REWARD 0.1 / Q_MAX  1.10874 / Loss  0.116943344474
loop took 0.267719030380249 seconds
TIMESTEP 1869 / STATE explor

TIMESTEP 1914 / STATE explore / EPSILON 0.09995294710000908 / ACTION 0 / REWARD 0.1 / Q_MAX  1.19142 / Loss  0.084720082581
loop took 0.28269410133361816 seconds
TIMESTEP 1915 / STATE explore / EPSILON 0.09995291380000909 / ACTION 0 / REWARD 0.1 / Q_MAX  1.76898 / Loss  0.0488297790289
loop took 0.27165985107421875 seconds
TIMESTEP 1916 / STATE explore / EPSILON 0.0999528805000091 / ACTION 0 / REWARD 0.1 / Q_MAX  1.22632 / Loss  0.0377681776881
loop took 0.29479217529296875 seconds
TIMESTEP 1917 / STATE explore / EPSILON 0.0999528472000091 / ACTION 0 / REWARD 0.1 / Q_MAX  1.07473 / Loss  0.0459627099335
loop took 0.26609277725219727 seconds
TIMESTEP 1918 / STATE explore / EPSILON 0.09995281390000911 / ACTION 0 / REWARD -1 / Q_MAX  1.54164 / Loss  0.15286026895
loop took 0.2988765239715576 seconds
TIMESTEP 1919 / STATE explore / EPSILON 0.09995278060000912 / ACTION 0 / REWARD 0.1 / Q_MAX  1.18237 / Loss  0.0160035155714
loop took 0.2837967872619629 seconds
TIMESTEP 1920 / STATE explore 

TIMESTEP 1965 / STATE explore / EPSILON 0.09995124880000941 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.28168 / Loss  0.0965753644705
loop took 0.2667078971862793 seconds
TIMESTEP 1966 / STATE explore / EPSILON 0.09995121550000942 / ACTION 0 / REWARD 0.1 / Q_MAX  1.00032 / Loss  0.142629951239
loop took 0.2675020694732666 seconds
TIMESTEP 1967 / STATE explore / EPSILON 0.09995118220000943 / ACTION 0 / REWARD 0.1 / Q_MAX  1.05751 / Loss  0.0407141186297
loop took 0.281757116317749 seconds
TIMESTEP 1968 / STATE explore / EPSILON 0.09995114890000943 / ACTION 0 / REWARD 0.1 / Q_MAX  0.87573 / Loss  0.0622756108642
loop took 0.266711950302124 seconds
TIMESTEP 1969 / STATE explore / EPSILON 0.09995111560000944 / ACTION 0 / REWARD 0.1 / Q_MAX  1.02125 / Loss  0.0487275980413
loop took 0.2825915813446045 seconds
TIMESTEP 1970 / STATE explore / EPSILON 0.09995108230000944 / ACTION 0 / REWARD 0.1 / Q_MAX  0.841773 / Loss  0.0361591689289
loop took 0.281752347946167 seconds
TIMESTEP 1971 / STATE explore 

TIMESTEP 2016 / STATE explore / EPSILON 0.09994955050000974 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0944887 / Loss  0.0686081945896
loop took 0.29732656478881836 seconds
TIMESTEP 2017 / STATE explore / EPSILON 0.09994951720000975 / ACTION 0 / REWARD 0.1 / Q_MAX  0.838589 / Loss  0.0516965277493
loop took 0.3048744201660156 seconds
TIMESTEP 2018 / STATE explore / EPSILON 0.09994948390000975 / ACTION 0 / REWARD 0.1 / Q_MAX  0.732067 / Loss  0.0306965373456
loop took 0.29537010192871094 seconds
TIMESTEP 2019 / STATE explore / EPSILON 0.09994945060000976 / ACTION 0 / REWARD -1 / Q_MAX  0.354261 / Loss  0.0503657758236
loop took 0.28376030921936035 seconds
TIMESTEP 2020 / STATE explore / EPSILON 0.09994941730000977 / ACTION 0 / REWARD 0.1 / Q_MAX  1.00147 / Loss  0.0697423741221
loop took 0.26670074462890625 seconds
TIMESTEP 2021 / STATE explore / EPSILON 0.09994938400000977 / ACTION 0 / REWARD 0.1 / Q_MAX  0.488355 / Loss  0.0374069958925
loop took 0.2677185535430908 seconds
TIMESTEP 2022 / ST

TIMESTEP 2067 / STATE explore / EPSILON 0.09994785220001007 / ACTION 0 / REWARD 0.1 / Q_MAX  0.968219 / Loss  0.027469502762
loop took 0.2514655590057373 seconds
TIMESTEP 2068 / STATE explore / EPSILON 0.09994781890001007 / ACTION 0 / REWARD -1 / Q_MAX  2.06284 / Loss  0.0870665162802
loop took 0.292675256729126 seconds
TIMESTEP 2069 / STATE explore / EPSILON 0.09994778560001008 / ACTION 0 / REWARD 0.1 / Q_MAX  0.575897 / Loss  0.583914101124
loop took 0.33095216751098633 seconds
TIMESTEP 2070 / STATE explore / EPSILON 0.09994775230001009 / ACTION 0 / REWARD 0.1 / Q_MAX  0.752708 / Loss  0.016877764836
loop took 0.27472496032714844 seconds
TIMESTEP 2071 / STATE explore / EPSILON 0.0999477190000101 / ACTION 0 / REWARD 0.1 / Q_MAX  1.77388 / Loss  0.0681256353855
loop took 0.2807273864746094 seconds
TIMESTEP 2072 / STATE explore / EPSILON 0.0999476857000101 / ACTION 0 / REWARD 0.1 / Q_MAX  1.0918 / Loss  0.0323649384081
loop took 0.2848026752471924 seconds
TIMESTEP 2073 / STATE explore /

TIMESTEP 2118 / STATE explore / EPSILON 0.0999461539000104 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0672159 / Loss  0.0381693169475
loop took 0.28174448013305664 seconds
TIMESTEP 2119 / STATE explore / EPSILON 0.0999461206000104 / ACTION 0 / REWARD -1 / Q_MAX  0.891039 / Loss  0.102034948766
loop took 0.29278135299682617 seconds
TIMESTEP 2120 / STATE explore / EPSILON 0.09994608730001041 / ACTION 0 / REWARD 0.1 / Q_MAX  0.330447 / Loss  0.0405244119465
loop took 0.289139986038208 seconds
TIMESTEP 2121 / STATE explore / EPSILON 0.09994605400001041 / ACTION 0 / REWARD 0.1 / Q_MAX  0.997511 / Loss  0.0652140825987
loop took 0.29277849197387695 seconds
TIMESTEP 2122 / STATE explore / EPSILON 0.09994602070001042 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0892734 / Loss  0.0374190881848
loop took 0.28977251052856445 seconds
TIMESTEP 2123 / STATE explore / EPSILON 0.09994598740001043 / ACTION 0 / REWARD 0.1 / Q_MAX  2.15619 / Loss  0.0251288600266
loop took 0.29879021644592285 seconds
TIMESTEP 2124 / STATE

TIMESTEP 2169 / STATE explore / EPSILON 0.09994445560001072 / ACTION 0 / REWARD 0.1 / Q_MAX  1.82509 / Loss  0.135848775506
loop took 0.2846493721008301 seconds
TIMESTEP 2170 / STATE explore / EPSILON 0.09994442230001073 / ACTION 0 / REWARD 0.1 / Q_MAX  0.955464 / Loss  0.608986973763
----------Random Action----------
loop took 0.296215295791626 seconds
TIMESTEP 2171 / STATE explore / EPSILON 0.09994438900001074 / ACTION 0 / REWARD 0.1 / Q_MAX  0.984758 / Loss  0.0292762443423
loop took 0.30231308937072754 seconds
TIMESTEP 2172 / STATE explore / EPSILON 0.09994435570001074 / ACTION 0 / REWARD -1 / Q_MAX  1.15336 / Loss  0.0508539304137
loop took 0.26847219467163086 seconds
TIMESTEP 2173 / STATE explore / EPSILON 0.09994432240001075 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.138058 / Loss  0.0369936227798
loop took 0.2677910327911377 seconds
TIMESTEP 2174 / STATE explore / EPSILON 0.09994428910001076 / ACTION 0 / REWARD 0.1 / Q_MAX  0.129794 / Loss  0.538446784019
loop took 0.2649238109588623 

TIMESTEP 2220 / STATE explore / EPSILON 0.09994275730001105 / ACTION 0 / REWARD 0.1 / Q_MAX  0.23678 / Loss  0.0665718689561
loop took 1.0140471458435059 seconds
TIMESTEP 2221 / STATE explore / EPSILON 0.09994272400001106 / ACTION 1 / REWARD -1 / Q_MAX  -0.150625 / Loss  0.0866744592786
loop took 0.2606220245361328 seconds
TIMESTEP 2222 / STATE explore / EPSILON 0.09994269070001106 / ACTION 0 / REWARD 0.1 / Q_MAX  0.211803 / Loss  0.0435889177024
loop took 0.2627089023590088 seconds
TIMESTEP 2223 / STATE explore / EPSILON 0.09994265740001107 / ACTION 0 / REWARD 0.1 / Q_MAX  0.692827 / Loss  0.0424793437123
loop took 0.2647249698638916 seconds
TIMESTEP 2224 / STATE explore / EPSILON 0.09994262410001108 / ACTION 0 / REWARD 0.1 / Q_MAX  0.211158 / Loss  0.0446174889803
loop took 0.2687089443206787 seconds
TIMESTEP 2225 / STATE explore / EPSILON 0.09994259080001108 / ACTION 0 / REWARD 0.1 / Q_MAX  0.212543 / Loss  0.0645494014025
loop took 0.26671266555786133 seconds
TIMESTEP 2226 / STATE 

TIMESTEP 2271 / STATE explore / EPSILON 0.09994105900001138 / ACTION 0 / REWARD 0.1 / Q_MAX  1.10145 / Loss  0.100067652762
loop took 0.2727222442626953 seconds
TIMESTEP 2272 / STATE explore / EPSILON 0.09994102570001138 / ACTION 0 / REWARD 0.1 / Q_MAX  1.09555 / Loss  0.0395181104541
loop took 0.29755258560180664 seconds
TIMESTEP 2273 / STATE explore / EPSILON 0.09994099240001139 / ACTION 0 / REWARD -1 / Q_MAX  1.06913 / Loss  0.0919341593981
loop took 0.29413270950317383 seconds
TIMESTEP 2274 / STATE explore / EPSILON 0.0999409591000114 / ACTION 0 / REWARD 0.1 / Q_MAX  1.21187 / Loss  0.0571291595697
loop took 0.2737257480621338 seconds
TIMESTEP 2275 / STATE explore / EPSILON 0.0999409258000114 / ACTION 0 / REWARD 0.1 / Q_MAX  1.04719 / Loss  0.0544775091112
loop took 0.26771068572998047 seconds
TIMESTEP 2276 / STATE explore / EPSILON 0.09994089250001141 / ACTION 0 / REWARD 0.1 / Q_MAX  1.04643 / Loss  0.0668119639158
loop took 0.2907826900482178 seconds
TIMESTEP 2277 / STATE explore

TIMESTEP 2322 / STATE explore / EPSILON 0.0999393607000117 / ACTION 0 / REWARD 0.1 / Q_MAX  0.89398 / Loss  0.0231480225921
loop took 0.2877671718597412 seconds
TIMESTEP 2323 / STATE explore / EPSILON 0.09993932740001171 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.729397 / Loss  0.010596210137
loop took 0.27171826362609863 seconds
TIMESTEP 2324 / STATE explore / EPSILON 0.09993929410001172 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.235035 / Loss  0.0202620327473
loop took 0.27878451347351074 seconds
TIMESTEP 2325 / STATE explore / EPSILON 0.09993926080001173 / ACTION 0 / REWARD 0.1 / Q_MAX  1.02484 / Loss  0.0200988911092
loop took 0.2838449478149414 seconds
TIMESTEP 2326 / STATE explore / EPSILON 0.09993922750001173 / ACTION 0 / REWARD 0.1 / Q_MAX  1.02711 / Loss  0.0652256458998
loop took 0.28282928466796875 seconds
TIMESTEP 2327 / STATE explore / EPSILON 0.09993919420001174 / ACTION 0 / REWARD 0.1 / Q_MAX  1.06875 / Loss  0.0396559163928
loop took 0.2867424488067627 seconds
TIMESTEP 2328 / STATE e

TIMESTEP 2373 / STATE explore / EPSILON 0.09993766240001203 / ACTION 0 / REWARD 0.1 / Q_MAX  1.11176 / Loss  0.036913190037
loop took 0.2677292823791504 seconds
TIMESTEP 2374 / STATE explore / EPSILON 0.09993762910001204 / ACTION 0 / REWARD 0.1 / Q_MAX  0.885317 / Loss  0.0467371866107
loop took 0.2798194885253906 seconds
TIMESTEP 2375 / STATE explore / EPSILON 0.09993759580001205 / ACTION 0 / REWARD 0.1 / Q_MAX  0.924345 / Loss  0.0461242720485
loop took 0.28466224670410156 seconds
TIMESTEP 2376 / STATE explore / EPSILON 0.09993756250001205 / ACTION 0 / REWARD 0.1 / Q_MAX  1.1289 / Loss  0.041184566915
loop took 0.2677159309387207 seconds
TIMESTEP 2377 / STATE explore / EPSILON 0.09993752920001206 / ACTION 0 / REWARD 0.1 / Q_MAX  0.612306 / Loss  0.0689581781626
loop took 0.2637012004852295 seconds
TIMESTEP 2378 / STATE explore / EPSILON 0.09993749590001207 / ACTION 0 / REWARD -1 / Q_MAX  0.968035 / Loss  0.0263226889074
loop took 0.28075218200683594 seconds
TIMESTEP 2379 / STATE expl

TIMESTEP 2424 / STATE explore / EPSILON 0.09993596410001236 / ACTION 0 / REWARD 0.1 / Q_MAX  0.749988 / Loss  0.0418521612883
loop took 0.2817494869232178 seconds
TIMESTEP 2425 / STATE explore / EPSILON 0.09993593080001237 / ACTION 0 / REWARD 0.1 / Q_MAX  0.622613 / Loss  0.0373522303998
loop took 0.2747342586517334 seconds
TIMESTEP 2426 / STATE explore / EPSILON 0.09993589750001237 / ACTION 0 / REWARD 0.1 / Q_MAX  0.572336 / Loss  0.084033370018
loop took 0.2656996250152588 seconds
TIMESTEP 2427 / STATE explore / EPSILON 0.09993586420001238 / ACTION 0 / REWARD 0.1 / Q_MAX  0.198644 / Loss  0.0352062880993
loop took 0.2647080421447754 seconds
TIMESTEP 2428 / STATE explore / EPSILON 0.09993583090001239 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.337913 / Loss  0.0386301763356
loop took 0.25095176696777344 seconds
TIMESTEP 2429 / STATE explore / EPSILON 0.0999357976000124 / ACTION 0 / REWARD -1 / Q_MAX  -0.354851 / Loss  0.0660153925419
loop took 0.26579999923706055 seconds
TIMESTEP 2430 / STATE

TIMESTEP 2475 / STATE explore / EPSILON 0.09993426580001269 / ACTION 0 / REWARD 0.1 / Q_MAX  0.30867 / Loss  0.164667755365
loop took 0.30701684951782227 seconds
TIMESTEP 2476 / STATE explore / EPSILON 0.0999342325000127 / ACTION 0 / REWARD -1 / Q_MAX  0.466274 / Loss  0.0325850881636
loop took 0.35272955894470215 seconds
TIMESTEP 2477 / STATE explore / EPSILON 0.0999341992000127 / ACTION 0 / REWARD 0.1 / Q_MAX  0.211439 / Loss  0.0239901356399
loop took 0.2834293842315674 seconds
TIMESTEP 2478 / STATE explore / EPSILON 0.09993416590001271 / ACTION 0 / REWARD 0.1 / Q_MAX  4.94143 / Loss  0.5756508708
loop took 0.2833249568939209 seconds
TIMESTEP 2479 / STATE explore / EPSILON 0.09993413260001272 / ACTION 0 / REWARD 0.1 / Q_MAX  0.323472 / Loss  0.051090143621
loop took 0.307863712310791 seconds
TIMESTEP 2480 / STATE explore / EPSILON 0.09993409930001272 / ACTION 0 / REWARD 0.1 / Q_MAX  0.256232 / Loss  0.0603803098202
loop took 1.0116510391235352 seconds
TIMESTEP 2481 / STATE explore /

TIMESTEP 2526 / STATE explore / EPSILON 0.09993256750001302 / ACTION 0 / REWARD -1 / Q_MAX  0.110261 / Loss  0.028338521719
loop took 0.28376078605651855 seconds
TIMESTEP 2527 / STATE explore / EPSILON 0.09993253420001302 / ACTION 0 / REWARD 0.1 / Q_MAX  0.903631 / Loss  0.0235729590058
loop took 0.29578447341918945 seconds
TIMESTEP 2528 / STATE explore / EPSILON 0.09993250090001303 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.299917 / Loss  0.0480124726892
loop took 0.2958853244781494 seconds
TIMESTEP 2529 / STATE explore / EPSILON 0.09993246760001304 / ACTION 0 / REWARD 0.1 / Q_MAX  0.365476 / Loss  0.0405181199312
loop took 0.2731175422668457 seconds
TIMESTEP 2530 / STATE explore / EPSILON 0.09993243430001304 / ACTION 0 / REWARD 0.1 / Q_MAX  0.343832 / Loss  0.055646751076
loop took 0.2957899570465088 seconds
TIMESTEP 2531 / STATE explore / EPSILON 0.09993240100001305 / ACTION 0 / REWARD 0.1 / Q_MAX  0.327495 / Loss  0.0162191223353
loop took 0.268709659576416 seconds
TIMESTEP 2532 / STATE e

TIMESTEP 2577 / STATE explore / EPSILON 0.09993086920001334 / ACTION 0 / REWARD 0.1 / Q_MAX  0.61489 / Loss  0.0468416437507
loop took 0.2932283878326416 seconds
TIMESTEP 2578 / STATE explore / EPSILON 0.09993083590001335 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.662991 / Loss  0.0495384372771
loop took 0.29001331329345703 seconds
TIMESTEP 2579 / STATE explore / EPSILON 0.09993080260001336 / ACTION 0 / REWARD 0.1 / Q_MAX  0.746627 / Loss  0.0483972877264
loop took 0.2827470302581787 seconds
TIMESTEP 2580 / STATE explore / EPSILON 0.09993076930001336 / ACTION 0 / REWARD 0.1 / Q_MAX  0.446414 / Loss  0.0202513020486
loop took 1.0280473232269287 seconds
TIMESTEP 2581 / STATE explore / EPSILON 0.09993073600001337 / ACTION 1 / REWARD -1 / Q_MAX  0.696371 / Loss  0.0346371680498
loop took 0.2705724239349365 seconds
TIMESTEP 2582 / STATE explore / EPSILON 0.09993070270001338 / ACTION 0 / REWARD 0.1 / Q_MAX  0.448628 / Loss  0.0208213403821
loop took 0.26473474502563477 seconds
TIMESTEP 2583 / STATE

TIMESTEP 2628 / STATE explore / EPSILON 0.09992917090001367 / ACTION 0 / REWARD 0.1 / Q_MAX  0.524322 / Loss  0.0428012013435
loop took 0.30771541595458984 seconds
TIMESTEP 2629 / STATE explore / EPSILON 0.09992913760001368 / ACTION 0 / REWARD -1 / Q_MAX  0.726066 / Loss  0.0551698580384
loop took 0.2767353057861328 seconds
TIMESTEP 2630 / STATE explore / EPSILON 0.09992910430001369 / ACTION 0 / REWARD 0.1 / Q_MAX  0.41253 / Loss  0.0161131806672
loop took 0.29277777671813965 seconds
TIMESTEP 2631 / STATE explore / EPSILON 0.09992907100001369 / ACTION 0 / REWARD 0.1 / Q_MAX  0.650089 / Loss  0.0456769093871
loop took 0.29991984367370605 seconds
TIMESTEP 2632 / STATE explore / EPSILON 0.0999290377000137 / ACTION 0 / REWARD 0.1 / Q_MAX  0.686193 / Loss  0.0496927276254
loop took 0.27171850204467773 seconds
TIMESTEP 2633 / STATE explore / EPSILON 0.0999290044000137 / ACTION 0 / REWARD 0.1 / Q_MAX  0.88546 / Loss  0.202957004309
loop took 0.2947883605957031 seconds
TIMESTEP 2634 / STATE ex

TIMESTEP 2679 / STATE explore / EPSILON 0.099927472600014 / ACTION 0 / REWARD 0.1 / Q_MAX  1.10476 / Loss  0.0365705788136
loop took 0.28180670738220215 seconds
TIMESTEP 2680 / STATE explore / EPSILON 0.099927439300014 / ACTION 0 / REWARD 0.1 / Q_MAX  0.278253 / Loss  0.0280935429037
----------Random Action----------
loop took 1.0232524871826172 seconds
TIMESTEP 2681 / STATE explore / EPSILON 0.09992740600001401 / ACTION 1 / REWARD 0.1 / Q_MAX  0.189772 / Loss  0.0170437078923
loop took 0.27903199195861816 seconds
TIMESTEP 2682 / STATE explore / EPSILON 0.09992737270001402 / ACTION 0 / REWARD 0.1 / Q_MAX  0.678409 / Loss  0.0200867597014
loop took 0.2696950435638428 seconds
TIMESTEP 2683 / STATE explore / EPSILON 0.09992733940001403 / ACTION 0 / REWARD -1 / Q_MAX  0.557414 / Loss  0.0297186262906
loop took 0.3008542060852051 seconds
TIMESTEP 2684 / STATE explore / EPSILON 0.09992730610001403 / ACTION 0 / REWARD 0.1 / Q_MAX  0.409642 / Loss  0.0172344911844
loop took 0.30954885482788086

TIMESTEP 2730 / STATE explore / EPSILON 0.09992577430001433 / ACTION 0 / REWARD 0.1 / Q_MAX  0.24616 / Loss  0.0938844233751
loop took 0.3139221668243408 seconds
TIMESTEP 2731 / STATE explore / EPSILON 0.09992574100001433 / ACTION 0 / REWARD 0.1 / Q_MAX  0.271736 / Loss  0.0435840971768
loop took 0.2867581844329834 seconds
TIMESTEP 2732 / STATE explore / EPSILON 0.09992570770001434 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.536719 / Loss  0.0113222319633
loop took 0.31036376953125 seconds
TIMESTEP 2733 / STATE explore / EPSILON 0.09992567440001435 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0497663 / Loss  0.055899027735
loop took 0.3038046360015869 seconds
TIMESTEP 2734 / STATE explore / EPSILON 0.09992564110001435 / ACTION 0 / REWARD 0.1 / Q_MAX  4.27902 / Loss  0.400158345699
loop took 0.2808234691619873 seconds
TIMESTEP 2735 / STATE explore / EPSILON 0.09992560780001436 / ACTION 0 / REWARD -1 / Q_MAX  4.23874 / Loss  0.0434409789741
loop took 0.283782958984375 seconds
TIMESTEP 2736 / STATE explore

TIMESTEP 2781 / STATE explore / EPSILON 0.09992407600001466 / ACTION 0 / REWARD 0.1 / Q_MAX  0.676498 / Loss  0.0375981852412
loop took 0.31984496116638184 seconds
TIMESTEP 2782 / STATE explore / EPSILON 0.09992404270001466 / ACTION 0 / REWARD 0.1 / Q_MAX  2.02663 / Loss  0.0392658561468
loop took 0.32085347175598145 seconds
TIMESTEP 2783 / STATE explore / EPSILON 0.09992400940001467 / ACTION 0 / REWARD 0.1 / Q_MAX  0.676966 / Loss  0.0448705665767
loop took 0.3298780918121338 seconds
TIMESTEP 2784 / STATE explore / EPSILON 0.09992397610001467 / ACTION 0 / REWARD 0.1 / Q_MAX  0.678755 / Loss  0.0366503037512
loop took 0.3159341812133789 seconds
TIMESTEP 2785 / STATE explore / EPSILON 0.09992394280001468 / ACTION 0 / REWARD -1 / Q_MAX  0.670213 / Loss  0.0576770864427
loop took 0.3358931541442871 seconds
TIMESTEP 2786 / STATE explore / EPSILON 0.09992390950001469 / ACTION 0 / REWARD 0.1 / Q_MAX  1.28529 / Loss  0.0107137169689
loop took 0.3318827152252197 seconds
TIMESTEP 2787 / STATE e

TIMESTEP 2832 / STATE explore / EPSILON 0.09992237770001498 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.239969 / Loss  0.0634735971689
loop took 0.3048110008239746 seconds
TIMESTEP 2833 / STATE explore / EPSILON 0.09992234440001499 / ACTION 0 / REWARD 0.1 / Q_MAX  0.838648 / Loss  0.0612235330045
loop took 0.28575968742370605 seconds
TIMESTEP 2834 / STATE explore / EPSILON 0.099922311100015 / ACTION 0 / REWARD 0.1 / Q_MAX  0.880271 / Loss  0.0466115176678
loop took 0.2907733917236328 seconds
TIMESTEP 2835 / STATE explore / EPSILON 0.099922277800015 / ACTION 0 / REWARD 0.1 / Q_MAX  0.861873 / Loss  0.0650152266026
loop took 0.29982757568359375 seconds
TIMESTEP 2836 / STATE explore / EPSILON 0.09992224450001501 / ACTION 0 / REWARD 0.1 / Q_MAX  0.837614 / Loss  0.0288678016514
loop took 0.2863762378692627 seconds
TIMESTEP 2837 / STATE explore / EPSILON 0.09992221120001502 / ACTION 0 / REWARD 0.1 / Q_MAX  0.155813 / Loss  0.043818872422
loop took 0.30381345748901367 seconds
TIMESTEP 2838 / STATE e

TIMESTEP 2883 / STATE explore / EPSILON 0.09992067940001531 / ACTION 0 / REWARD 0.1 / Q_MAX  0.748077 / Loss  0.0263470765203
loop took 0.28242993354797363 seconds
TIMESTEP 2884 / STATE explore / EPSILON 0.09992064610001532 / ACTION 0 / REWARD -1 / Q_MAX  0.360379 / Loss  0.0275587085634
loop took 0.29686617851257324 seconds
TIMESTEP 2885 / STATE explore / EPSILON 0.09992061280001532 / ACTION 0 / REWARD 0.1 / Q_MAX  0.790649 / Loss  0.0506758540869
loop took 0.2840614318847656 seconds
TIMESTEP 2886 / STATE explore / EPSILON 0.09992057950001533 / ACTION 0 / REWARD 0.1 / Q_MAX  0.387569 / Loss  0.395798116922
loop took 0.2857654094696045 seconds
TIMESTEP 2887 / STATE explore / EPSILON 0.09992054620001534 / ACTION 0 / REWARD 0.1 / Q_MAX  0.207343 / Loss  0.042500000447
loop took 0.2977886199951172 seconds
TIMESTEP 2888 / STATE explore / EPSILON 0.09992051290001534 / ACTION 0 / REWARD 0.1 / Q_MAX  0.391647 / Loss  0.0365105420351
loop took 0.29277896881103516 seconds
TIMESTEP 2889 / STATE 

TIMESTEP 2934 / STATE explore / EPSILON 0.09991898110001564 / ACTION 0 / REWARD -1 / Q_MAX  0.164418 / Loss  0.0164506081492
loop took 0.2857651710510254 seconds
TIMESTEP 2935 / STATE explore / EPSILON 0.09991894780001565 / ACTION 0 / REWARD 0.1 / Q_MAX  1.08441 / Loss  0.0450262911618
loop took 0.2837531566619873 seconds
TIMESTEP 2936 / STATE explore / EPSILON 0.09991891450001565 / ACTION 0 / REWARD 0.1 / Q_MAX  0.235638 / Loss  0.0195526666939
loop took 0.2797408103942871 seconds
TIMESTEP 2937 / STATE explore / EPSILON 0.09991888120001566 / ACTION 0 / REWARD 0.1 / Q_MAX  0.84229 / Loss  0.0325863473117
loop took 0.28375935554504395 seconds
TIMESTEP 2938 / STATE explore / EPSILON 0.09991884790001566 / ACTION 0 / REWARD 0.1 / Q_MAX  0.210492 / Loss  0.0520258769393
loop took 0.31784987449645996 seconds
TIMESTEP 2939 / STATE explore / EPSILON 0.09991881460001567 / ACTION 0 / REWARD 0.1 / Q_MAX  0.247306 / Loss  0.0380569882691
loop took 0.2817540168762207 seconds
TIMESTEP 2940 / STATE e

TIMESTEP 2985 / STATE explore / EPSILON 0.09991728280001597 / ACTION 0 / REWARD -1 / Q_MAX  -0.611581 / Loss  0.0305613316596
loop took 0.28379249572753906 seconds
TIMESTEP 2986 / STATE explore / EPSILON 0.09991724950001597 / ACTION 0 / REWARD 0.1 / Q_MAX  1.04653 / Loss  0.0204462539405
loop took 0.2827458381652832 seconds
TIMESTEP 2987 / STATE explore / EPSILON 0.09991721620001598 / ACTION 0 / REWARD 0.1 / Q_MAX  0.827336 / Loss  0.0502195134759
loop took 0.29384350776672363 seconds
TIMESTEP 2988 / STATE explore / EPSILON 0.09991718290001599 / ACTION 0 / REWARD 0.1 / Q_MAX  0.762682 / Loss  0.0137262353674
loop took 0.31714844703674316 seconds
TIMESTEP 2989 / STATE explore / EPSILON 0.09991714960001599 / ACTION 0 / REWARD 0.1 / Q_MAX  0.318367 / Loss  0.0342964753509
loop took 0.27071642875671387 seconds
TIMESTEP 2990 / STATE explore / EPSILON 0.099917116300016 / ACTION 0 / REWARD 0.1 / Q_MAX  0.50902 / Loss  0.0909490138292
loop took 0.2949252128601074 seconds
TIMESTEP 2991 / STATE 

TIMESTEP 3036 / STATE explore / EPSILON 0.0999155845000163 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.261828 / Loss  0.0196625031531
loop took 0.28177738189697266 seconds
TIMESTEP 3037 / STATE explore / EPSILON 0.0999155512000163 / ACTION 0 / REWARD 0.1 / Q_MAX  0.290879 / Loss  0.0327560044825
loop took 0.28475356101989746 seconds
TIMESTEP 3038 / STATE explore / EPSILON 0.0999155179000163 / ACTION 0 / REWARD 0.1 / Q_MAX  0.912576 / Loss  0.0399792864919
loop took 0.28275227546691895 seconds
TIMESTEP 3039 / STATE explore / EPSILON 0.09991548460001631 / ACTION 0 / REWARD 0.1 / Q_MAX  0.264732 / Loss  0.0387420579791
loop took 0.3890352249145508 seconds
TIMESTEP 3040 / STATE explore / EPSILON 0.09991545130001632 / ACTION 0 / REWARD 0.1 / Q_MAX  0.255607 / Loss  0.0359470807016
loop took 1.084742784500122 seconds
TIMESTEP 3041 / STATE explore / EPSILON 0.09991541800001633 / ACTION 1 / REWARD -1 / Q_MAX  -1.02776 / Loss  0.0911838784814
loop took 0.27362799644470215 seconds
TIMESTEP 3042 / STATE 

TIMESTEP 3087 / STATE explore / EPSILON 0.09991388620001662 / ACTION 0 / REWARD 0.1 / Q_MAX  1.27448 / Loss  0.0211932267994
loop took 0.2887592315673828 seconds
TIMESTEP 3088 / STATE explore / EPSILON 0.09991385290001663 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.185996 / Loss  0.0916618406773
loop took 0.28275322914123535 seconds
TIMESTEP 3089 / STATE explore / EPSILON 0.09991381960001663 / ACTION 0 / REWARD 0.1 / Q_MAX  0.471901 / Loss  0.0336032137275
loop took 0.28375673294067383 seconds
TIMESTEP 3090 / STATE explore / EPSILON 0.09991378630001664 / ACTION 0 / REWARD 0.1 / Q_MAX  0.465893 / Loss  0.0397726222873
loop took 0.2657637596130371 seconds
TIMESTEP 3091 / STATE explore / EPSILON 0.09991375300001665 / ACTION 0 / REWARD -1 / Q_MAX  0.60447 / Loss  0.0681855976582
loop took 0.2827489376068115 seconds
TIMESTEP 3092 / STATE explore / EPSILON 0.09991371970001665 / ACTION 0 / REWARD 0.1 / Q_MAX  0.375026 / Loss  0.171447977424
loop took 0.2996799945831299 seconds
TIMESTEP 3093 / STATE e

TIMESTEP 3138 / STATE explore / EPSILON 0.09991218790001695 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.497992 / Loss  0.00742549961433
loop took 0.2993168830871582 seconds
TIMESTEP 3139 / STATE explore / EPSILON 0.09991215460001696 / ACTION 0 / REWARD 0.1 / Q_MAX  0.571837 / Loss  0.0309509783983
loop took 0.31444263458251953 seconds
TIMESTEP 3140 / STATE explore / EPSILON 0.09991212130001696 / ACTION 0 / REWARD 0.1 / Q_MAX  0.313104 / Loss  0.278210639954
----------Random Action----------
loop took 1.0266335010528564 seconds
TIMESTEP 3141 / STATE explore / EPSILON 0.09991208800001697 / ACTION 1 / REWARD 0.1 / Q_MAX  4.24303 / Loss  0.0580917783082
loop took 0.28465962409973145 seconds
TIMESTEP 3142 / STATE explore / EPSILON 0.09991205470001698 / ACTION 0 / REWARD 0.1 / Q_MAX  0.522677 / Loss  0.0496885329485
loop took 0.2857472896575928 seconds
TIMESTEP 3143 / STATE explore / EPSILON 0.09991202140001698 / ACTION 0 / REWARD -1 / Q_MAX  0.527759 / Loss  0.0343027077615
loop took 0.268850564956

TIMESTEP 3188 / STATE explore / EPSILON 0.09991052290001727 / ACTION 0 / REWARD 0.1 / Q_MAX  0.602732 / Loss  0.0489337742329
loop took 0.28374814987182617 seconds
TIMESTEP 3189 / STATE explore / EPSILON 0.09991048960001728 / ACTION 0 / REWARD 0.1 / Q_MAX  0.203122 / Loss  0.0747192427516
loop took 0.28679347038269043 seconds
TIMESTEP 3190 / STATE explore / EPSILON 0.09991045630001728 / ACTION 0 / REWARD -1 / Q_MAX  0.345403 / Loss  0.190494656563
loop took 1.009566068649292 seconds
TIMESTEP 3191 / STATE explore / EPSILON 0.09991042300001729 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.545419 / Loss  0.0598576031625
loop took 0.2765958309173584 seconds
TIMESTEP 3192 / STATE explore / EPSILON 0.0999103897000173 / ACTION 0 / REWARD 0.1 / Q_MAX  0.141485 / Loss  0.0430647358298
loop took 0.27974510192871094 seconds
TIMESTEP 3193 / STATE explore / EPSILON 0.0999103564000173 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.147293 / Loss  0.0200446732342
loop took 0.3088259696960449 seconds
TIMESTEP 3194 / STATE 

TIMESTEP 3239 / STATE explore / EPSILON 0.0999088246000176 / ACTION 0 / REWARD 0.1 / Q_MAX  0.45812 / Loss  0.0294161494821
loop took 0.2847599983215332 seconds
TIMESTEP 3240 / STATE explore / EPSILON 0.0999087913000176 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0979053 / Loss  0.0291198659688
loop took 0.2847599983215332 seconds
TIMESTEP 3241 / STATE explore / EPSILON 0.09990875800001761 / ACTION 0 / REWARD 0.1 / Q_MAX  0.217202 / Loss  0.0203720275313
loop took 0.2828860282897949 seconds
TIMESTEP 3242 / STATE explore / EPSILON 0.09990872470001762 / ACTION 0 / REWARD 0.1 / Q_MAX  0.263421 / Loss  0.0458278283477
loop took 0.28275275230407715 seconds
TIMESTEP 3243 / STATE explore / EPSILON 0.09990869140001762 / ACTION 0 / REWARD 0.1 / Q_MAX  0.464456 / Loss  0.0255859084427
loop took 0.28275203704833984 seconds
TIMESTEP 3244 / STATE explore / EPSILON 0.09990865810001763 / ACTION 0 / REWARD 0.1 / Q_MAX  0.543369 / Loss  0.0300457812846
loop took 0.2825312614440918 seconds
TIMESTEP 3245 / STATE 

TIMESTEP 3290 / STATE explore / EPSILON 0.09990712630001793 / ACTION 0 / REWARD 0.1 / Q_MAX  0.726024 / Loss  0.0593637079
loop took 0.315842866897583 seconds
TIMESTEP 3291 / STATE explore / EPSILON 0.09990709300001793 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.942408 / Loss  0.0526403039694
loop took 0.28600072860717773 seconds
TIMESTEP 3292 / STATE explore / EPSILON 0.09990705970001794 / ACTION 0 / REWARD -1 / Q_MAX  0.293982 / Loss  0.0614124238491
loop took 0.3157520294189453 seconds
TIMESTEP 3293 / STATE explore / EPSILON 0.09990702640001795 / ACTION 0 / REWARD 0.1 / Q_MAX  0.795143 / Loss  0.0265083815902
loop took 0.3164646625518799 seconds
TIMESTEP 3294 / STATE explore / EPSILON 0.09990699310001795 / ACTION 0 / REWARD 0.1 / Q_MAX  1.48511 / Loss  0.0542132407427
loop took 0.28575921058654785 seconds
TIMESTEP 3295 / STATE explore / EPSILON 0.09990695980001796 / ACTION 0 / REWARD 0.1 / Q_MAX  0.16478 / Loss  0.0112500730902
loop took 0.2947874069213867 seconds
TIMESTEP 3296 / STATE expl

TIMESTEP 3341 / STATE explore / EPSILON 0.09990542800001825 / ACTION 0 / REWARD -1 / Q_MAX  0.399595 / Loss  0.0665014311671
loop took 0.28375768661499023 seconds
TIMESTEP 3342 / STATE explore / EPSILON 0.09990539470001826 / ACTION 0 / REWARD 0.1 / Q_MAX  0.668851 / Loss  0.0280021578074
loop took 0.2833993434906006 seconds
TIMESTEP 3343 / STATE explore / EPSILON 0.09990536140001827 / ACTION 0 / REWARD 0.1 / Q_MAX  1.13327 / Loss  0.0276770796627
loop took 0.29865360260009766 seconds
TIMESTEP 3344 / STATE explore / EPSILON 0.09990532810001827 / ACTION 0 / REWARD 0.1 / Q_MAX  0.197121 / Loss  0.023464826867
loop took 0.2842414379119873 seconds
TIMESTEP 3345 / STATE explore / EPSILON 0.09990529480001828 / ACTION 0 / REWARD 0.1 / Q_MAX  0.527272 / Loss  0.206687197089
loop took 0.2847561836242676 seconds
TIMESTEP 3346 / STATE explore / EPSILON 0.09990526150001829 / ACTION 0 / REWARD 0.1 / Q_MAX  0.561107 / Loss  0.0246268883348
loop took 0.306441068649292 seconds
TIMESTEP 3347 / STATE exp

TIMESTEP 3392 / STATE explore / EPSILON 0.09990372970001858 / ACTION 0 / REWARD 0.1 / Q_MAX  0.160048 / Loss  0.0346230491996
loop took 0.26485180854797363 seconds
TIMESTEP 3393 / STATE explore / EPSILON 0.09990369640001859 / ACTION 0 / REWARD -1 / Q_MAX  0.54819 / Loss  0.0175105296075
loop took 0.26871824264526367 seconds
TIMESTEP 3394 / STATE explore / EPSILON 0.0999036631000186 / ACTION 0 / REWARD 0.1 / Q_MAX  0.15791 / Loss  0.108450517058
loop took 0.2657032012939453 seconds
TIMESTEP 3395 / STATE explore / EPSILON 0.0999036298000186 / ACTION 0 / REWARD 0.1 / Q_MAX  0.284926 / Loss  0.0249021183699
loop took 0.2657053470611572 seconds
TIMESTEP 3396 / STATE explore / EPSILON 0.09990359650001861 / ACTION 0 / REWARD 0.1 / Q_MAX  0.599016 / Loss  0.0148398317397
loop took 0.2667114734649658 seconds
TIMESTEP 3397 / STATE explore / EPSILON 0.09990356320001861 / ACTION 0 / REWARD 0.1 / Q_MAX  2.20068 / Loss  0.0344210378826
loop took 0.27751588821411133 seconds
TIMESTEP 3398 / STATE expl

TIMESTEP 3443 / STATE explore / EPSILON 0.09990203140001891 / ACTION 0 / REWARD -1 / Q_MAX  0.727836 / Loss  0.0751434266567
loop took 0.2897028923034668 seconds
TIMESTEP 3444 / STATE explore / EPSILON 0.09990199810001892 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.442926 / Loss  0.117894873023
loop took 0.28174376487731934 seconds
TIMESTEP 3445 / STATE explore / EPSILON 0.09990196480001892 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0644638 / Loss  0.118786804378
loop took 0.292830228805542 seconds
TIMESTEP 3446 / STATE explore / EPSILON 0.09990193150001893 / ACTION 0 / REWARD 0.1 / Q_MAX  0.59696 / Loss  0.0340124405921
loop took 0.29930710792541504 seconds
TIMESTEP 3447 / STATE explore / EPSILON 0.09990189820001893 / ACTION 0 / REWARD 0.1 / Q_MAX  0.902833 / Loss  0.0347140617669
loop took 0.29087138175964355 seconds
TIMESTEP 3448 / STATE explore / EPSILON 0.09990186490001894 / ACTION 0 / REWARD 0.1 / Q_MAX  0.617535 / Loss  0.0400453358889
loop took 0.28376007080078125 seconds
TIMESTEP 3449 / STAT

TIMESTEP 3494 / STATE explore / EPSILON 0.09990033310001924 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.323739 / Loss  0.0295030735433
loop took 0.28275179862976074 seconds
TIMESTEP 3495 / STATE explore / EPSILON 0.09990029980001924 / ACTION 0 / REWARD 0.1 / Q_MAX  0.498606 / Loss  0.0143495220691
loop took 0.2987792491912842 seconds
TIMESTEP 3496 / STATE explore / EPSILON 0.09990026650001925 / ACTION 0 / REWARD 0.1 / Q_MAX  0.309662 / Loss  0.0356808751822
loop took 0.3012235164642334 seconds
TIMESTEP 3497 / STATE explore / EPSILON 0.09990023320001926 / ACTION 0 / REWARD -1 / Q_MAX  -0.176141 / Loss  0.02571939677
loop took 0.28208279609680176 seconds
TIMESTEP 3498 / STATE explore / EPSILON 0.09990019990001926 / ACTION 0 / REWARD 0.1 / Q_MAX  0.55867 / Loss  0.0165489651263
loop took 0.29883313179016113 seconds
TIMESTEP 3499 / STATE explore / EPSILON 0.09990016660001927 / ACTION 0 / REWARD 0.1 / Q_MAX  0.27418 / Loss  0.068200699985
loop took 0.28475213050842285 seconds
TIMESTEP 3500 / STATE 

TIMESTEP 3545 / STATE explore / EPSILON 0.09989863480001956 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.484627 / Loss  0.0226732194424
loop took 0.2877473831176758 seconds
TIMESTEP 3546 / STATE explore / EPSILON 0.09989860150001957 / ACTION 0 / REWARD 0.1 / Q_MAX  0.202785 / Loss  0.0349148288369
loop took 0.3078286647796631 seconds
TIMESTEP 3547 / STATE explore / EPSILON 0.09989856820001958 / ACTION 0 / REWARD 0.1 / Q_MAX  0.370488 / Loss  0.0345129668713
loop took 0.29523777961730957 seconds
TIMESTEP 3548 / STATE explore / EPSILON 0.09989853490001958 / ACTION 0 / REWARD -1 / Q_MAX  0.450973 / Loss  0.0160686839372
loop took 0.2897610664367676 seconds
TIMESTEP 3549 / STATE explore / EPSILON 0.09989850160001959 / ACTION 0 / REWARD 0.1 / Q_MAX  0.219519 / Loss  0.0133482404053
loop took 0.3099021911621094 seconds
TIMESTEP 3550 / STATE explore / EPSILON 0.0998984683000196 / ACTION 0 / REWARD 0.1 / Q_MAX  0.264408 / Loss  0.0292202401906
----------Random Action----------
loop took 0.2737357616424

TIMESTEP 3596 / STATE explore / EPSILON 0.09989693650001989 / ACTION 0 / REWARD -1 / Q_MAX  0.45346 / Loss  0.0204693004489
loop took 0.30054545402526855 seconds
TIMESTEP 3597 / STATE explore / EPSILON 0.0998969032000199 / ACTION 0 / REWARD 0.1 / Q_MAX  0.740026 / Loss  0.0186068750918
loop took 0.2873356342315674 seconds
TIMESTEP 3598 / STATE explore / EPSILON 0.0998968699000199 / ACTION 0 / REWARD 0.1 / Q_MAX  0.605282 / Loss  0.0255014132708
loop took 0.2837545871734619 seconds
TIMESTEP 3599 / STATE explore / EPSILON 0.09989683660001991 / ACTION 0 / REWARD 0.1 / Q_MAX  0.685367 / Loss  0.0138329034671
loop took 0.28375983238220215 seconds
TIMESTEP 3600 / STATE explore / EPSILON 0.09989680330001992 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.00699 / Loss  0.0287632085383
loop took 0.2980766296386719 seconds
TIMESTEP 3601 / STATE explore / EPSILON 0.09989677000001992 / ACTION 0 / REWARD 0.1 / Q_MAX  0.389895 / Loss  0.0452778190374
loop took 0.3007950782775879 seconds
TIMESTEP 3602 / STATE ex

TIMESTEP 3647 / STATE explore / EPSILON 0.09989523820002022 / ACTION 0 / REWARD 0.1 / Q_MAX  0.17351 / Loss  0.0257690642029
loop took 0.35245633125305176 seconds
TIMESTEP 3648 / STATE explore / EPSILON 0.09989520490002023 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.572777 / Loss  0.00618576398119
loop took 0.3157782554626465 seconds
TIMESTEP 3649 / STATE explore / EPSILON 0.09989517160002023 / ACTION 0 / REWARD 0.1 / Q_MAX  0.50635 / Loss  0.0364041626453
loop took 0.29679298400878906 seconds
TIMESTEP 3650 / STATE explore / EPSILON 0.09989513830002024 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.262971 / Loss  0.0566455423832
loop took 0.337907075881958 seconds
TIMESTEP 3651 / STATE explore / EPSILON 0.09989510500002025 / ACTION 0 / REWARD 0.1 / Q_MAX  0.224138 / Loss  0.0354964099824
loop took 0.33072566986083984 seconds
TIMESTEP 3652 / STATE explore / EPSILON 0.09989507170002025 / ACTION 0 / REWARD 0.1 / Q_MAX  0.195903 / Loss  0.0296603832394
loop took 0.29083943367004395 seconds
TIMESTEP 3653 / ST

TIMESTEP 3698 / STATE explore / EPSILON 0.09989353990002055 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.369749 / Loss  0.0173569191247
loop took 0.3264744281768799 seconds
TIMESTEP 3699 / STATE explore / EPSILON 0.09989350660002055 / ACTION 0 / REWARD -1 / Q_MAX  0.334865 / Loss  0.0698135495186
loop took 0.3068115711212158 seconds
TIMESTEP 3700 / STATE explore / EPSILON 0.09989347330002056 / ACTION 0 / REWARD 0.1 / Q_MAX  0.649199 / Loss  0.0493199788034
loop took 0.30861449241638184 seconds
TIMESTEP 3701 / STATE explore / EPSILON 0.09989344000002057 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0476845 / Loss  0.014366844669
loop took 0.29077935218811035 seconds
TIMESTEP 3702 / STATE explore / EPSILON 0.09989340670002057 / ACTION 0 / REWARD 0.1 / Q_MAX  0.507653 / Loss  0.0172985121608
loop took 0.3196690082550049 seconds
TIMESTEP 3703 / STATE explore / EPSILON 0.09989337340002058 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0459564 / Loss  0.0317519567907
loop took 0.31688404083251953 seconds
TIMESTEP 3704 / 

TIMESTEP 3749 / STATE explore / EPSILON 0.09989184160002088 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.872354 / Loss  0.0496588014066
loop took 0.3179020881652832 seconds
TIMESTEP 3750 / STATE explore / EPSILON 0.09989180830002088 / ACTION 0 / REWARD 0.1 / Q_MAX  0.324035 / Loss  0.0223363265395
----------Random Action----------
loop took 1.0456302165985107 seconds
TIMESTEP 3751 / STATE explore / EPSILON 0.09989177500002089 / ACTION 1 / REWARD 0.1 / Q_MAX  0.847875 / Loss  0.0151045294479
loop took 0.2675774097442627 seconds
TIMESTEP 3752 / STATE explore / EPSILON 0.0998917417000209 / ACTION 0 / REWARD 0.1 / Q_MAX  0.191303 / Loss  0.0345581546426
loop took 0.29279041290283203 seconds
TIMESTEP 3753 / STATE explore / EPSILON 0.0998917084000209 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.943727 / Loss  0.0251156520098
loop took 0.2829108238220215 seconds
TIMESTEP 3754 / STATE explore / EPSILON 0.09989167510002091 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.109523 / Loss  0.0208103079349
loop took 0.27174305915

TIMESTEP 3799 / STATE explore / EPSILON 0.0998901766000212 / ACTION 0 / REWARD 0.1 / Q_MAX  0.691062 / Loss  0.0181819815189
loop took 0.30000758171081543 seconds
TIMESTEP 3800 / STATE explore / EPSILON 0.0998901433000212 / ACTION 0 / REWARD 0.1 / Q_MAX  1.48881 / Loss  0.0177947562188
loop took 0.29980897903442383 seconds
TIMESTEP 3801 / STATE explore / EPSILON 0.09989011000002121 / ACTION 0 / REWARD 0.1 / Q_MAX  0.338351 / Loss  0.0294296555221
loop took 0.30087995529174805 seconds
TIMESTEP 3802 / STATE explore / EPSILON 0.09989007670002122 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.642592 / Loss  0.00736682163551
loop took 0.31776857376098633 seconds
TIMESTEP 3803 / STATE explore / EPSILON 0.09989004340002122 / ACTION 0 / REWARD 0.1 / Q_MAX  0.470156 / Loss  0.030035879463
loop took 0.314863920211792 seconds
TIMESTEP 3804 / STATE explore / EPSILON 0.09989001010002123 / ACTION 0 / REWARD 0.1 / Q_MAX  0.73122 / Loss  0.0247741565108
loop took 0.29782795906066895 seconds
TIMESTEP 3805 / STATE

TIMESTEP 3850 / STATE explore / EPSILON 0.09988847830002152 / ACTION 0 / REWARD -1 / Q_MAX  0.239562 / Loss  0.0359601154923
loop took 0.3138272762298584 seconds
TIMESTEP 3851 / STATE explore / EPSILON 0.09988844500002153 / ACTION 0 / REWARD 0.1 / Q_MAX  0.651019 / Loss  0.0317898355424
loop took 0.3036630153656006 seconds
TIMESTEP 3852 / STATE explore / EPSILON 0.09988841170002154 / ACTION 0 / REWARD 0.1 / Q_MAX  0.6402 / Loss  0.0292054284364
loop took 0.28418779373168945 seconds
TIMESTEP 3853 / STATE explore / EPSILON 0.09988837840002154 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.232962 / Loss  0.0299292765558
loop took 0.2847626209259033 seconds
TIMESTEP 3854 / STATE explore / EPSILON 0.09988834510002155 / ACTION 0 / REWARD 0.1 / Q_MAX  0.516704 / Loss  0.0292367823422
loop took 0.2827460765838623 seconds
TIMESTEP 3855 / STATE explore / EPSILON 0.09988831180002156 / ACTION 0 / REWARD 0.1 / Q_MAX  0.838144 / Loss  0.0626934021711
loop took 0.2827601432800293 seconds
TIMESTEP 3856 / STATE e

TIMESTEP 3901 / STATE explore / EPSILON 0.09988678000002185 / ACTION 0 / REWARD 0.1 / Q_MAX  0.425877 / Loss  0.0418055132031
loop took 0.28375864028930664 seconds
TIMESTEP 3902 / STATE explore / EPSILON 0.09988674670002186 / ACTION 0 / REWARD 0.1 / Q_MAX  0.441866 / Loss  0.0242153815925
loop took 0.30892252922058105 seconds
TIMESTEP 3903 / STATE explore / EPSILON 0.09988671340002186 / ACTION 0 / REWARD 0.1 / Q_MAX  0.783001 / Loss  0.0387693047523
loop took 0.30004072189331055 seconds
TIMESTEP 3904 / STATE explore / EPSILON 0.09988668010002187 / ACTION 0 / REWARD -1 / Q_MAX  0.45945 / Loss  0.0389730483294
loop took 0.3218533992767334 seconds
TIMESTEP 3905 / STATE explore / EPSILON 0.09988664680002188 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.191618 / Loss  0.0419410988688
loop took 0.2993950843811035 seconds
TIMESTEP 3906 / STATE explore / EPSILON 0.09988661350002188 / ACTION 0 / REWARD 0.1 / Q_MAX  2.78064 / Loss  0.0443180501461
loop took 0.294370174407959 seconds
TIMESTEP 3907 / STATE 

TIMESTEP 3952 / STATE explore / EPSILON 0.09988508170002218 / ACTION 0 / REWARD -1 / Q_MAX  0.720927 / Loss  0.0239857602865
loop took 0.29120564460754395 seconds
TIMESTEP 3953 / STATE explore / EPSILON 0.09988504840002219 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.182959 / Loss  0.0639616400003
loop took 0.2907984256744385 seconds
TIMESTEP 3954 / STATE explore / EPSILON 0.09988501510002219 / ACTION 0 / REWARD 0.1 / Q_MAX  1.25694 / Loss  0.0396384075284
loop took 0.2967362403869629 seconds
TIMESTEP 3955 / STATE explore / EPSILON 0.0998849818000222 / ACTION 0 / REWARD 0.1 / Q_MAX  0.404339 / Loss  0.035783033818
loop took 0.30791592597961426 seconds
TIMESTEP 3956 / STATE explore / EPSILON 0.0998849485000222 / ACTION 0 / REWARD 0.1 / Q_MAX  0.561824 / Loss  0.0337743163109
loop took 0.3137080669403076 seconds
TIMESTEP 3957 / STATE explore / EPSILON 0.09988491520002221 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0655484 / Loss  0.0303263906389
loop took 0.3093113899230957 seconds
TIMESTEP 3958 / STATE 

TIMESTEP 4002 / STATE explore / EPSILON 0.0998834167000225 / ACTION 0 / REWARD 0.1 / Q_MAX  0.37014 / Loss  0.0567737780511
loop took 0.29950928688049316 seconds
TIMESTEP 4003 / STATE explore / EPSILON 0.09988338340002251 / ACTION 0 / REWARD -1 / Q_MAX  0.14776 / Loss  0.0251723974943
loop took 0.3168802261352539 seconds
TIMESTEP 4004 / STATE explore / EPSILON 0.09988335010002251 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0962584 / Loss  0.0202806740999
loop took 0.30379796028137207 seconds
TIMESTEP 4005 / STATE explore / EPSILON 0.09988331680002252 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.18995 / Loss  0.0334787033498
loop took 0.40006399154663086 seconds
TIMESTEP 4006 / STATE explore / EPSILON 0.09988328350002253 / ACTION 0 / REWARD 0.1 / Q_MAX  0.463016 / Loss  0.0276652164757
loop took 0.3509335517883301 seconds
TIMESTEP 4007 / STATE explore / EPSILON 0.09988325020002253 / ACTION 0 / REWARD 0.1 / Q_MAX  0.125149 / Loss  0.032613158226
loop took 0.3847696781158447 seconds
TIMESTEP 4008 / STATE e

TIMESTEP 4053 / STATE explore / EPSILON 0.09988171840002283 / ACTION 0 / REWARD 0.1 / Q_MAX  0.223362 / Loss  0.0226669721305
loop took 0.31082987785339355 seconds
TIMESTEP 4054 / STATE explore / EPSILON 0.09988168510002284 / ACTION 0 / REWARD 0.1 / Q_MAX  0.139242 / Loss  0.303271412849
loop took 0.31893110275268555 seconds
TIMESTEP 4055 / STATE explore / EPSILON 0.09988165180002284 / ACTION 0 / REWARD 0.1 / Q_MAX  0.232139 / Loss  0.0132567957044
loop took 0.3016197681427002 seconds
TIMESTEP 4056 / STATE explore / EPSILON 0.09988161850002285 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0729156 / Loss  0.027042362839
loop took 0.3007991313934326 seconds
TIMESTEP 4057 / STATE explore / EPSILON 0.09988158520002285 / ACTION 0 / REWARD 0.1 / Q_MAX  0.288255 / Loss  0.0341972187161
loop took 0.3018062114715576 seconds
TIMESTEP 4058 / STATE explore / EPSILON 0.09988155190002286 / ACTION 0 / REWARD 0.1 / Q_MAX  0.431691 / Loss  0.0155386319384
loop took 0.2975132465362549 seconds
TIMESTEP 4059 / STATE

TIMESTEP 4104 / STATE explore / EPSILON 0.09988002010002316 / ACTION 0 / REWARD 0.1 / Q_MAX  0.687761 / Loss  0.0214361660182
loop took 0.2998499870300293 seconds
TIMESTEP 4105 / STATE explore / EPSILON 0.09987998680002316 / ACTION 0 / REWARD 0.1 / Q_MAX  0.474311 / Loss  0.0181562248617
loop took 0.28375840187072754 seconds
TIMESTEP 4106 / STATE explore / EPSILON 0.09987995350002317 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.322096 / Loss  0.0258263200521
loop took 0.28575992584228516 seconds
TIMESTEP 4107 / STATE explore / EPSILON 0.09987992020002318 / ACTION 0 / REWARD 0.1 / Q_MAX  0.301072 / Loss  0.0115898484364
loop took 0.31125569343566895 seconds
TIMESTEP 4108 / STATE explore / EPSILON 0.09987988690002318 / ACTION 0 / REWARD 0.1 / Q_MAX  0.299057 / Loss  0.00906205177307
loop took 0.2846035957336426 seconds
TIMESTEP 4109 / STATE explore / EPSILON 0.09987985360002319 / ACTION 0 / REWARD -1 / Q_MAX  0.30132 / Loss  0.0096797272563
loop took 0.31016039848327637 seconds
TIMESTEP 4110 / ST

TIMESTEP 4155 / STATE explore / EPSILON 0.09987832180002348 / ACTION 0 / REWARD 0.1 / Q_MAX  0.242405 / Loss  0.103195562959
loop took 0.2978475093841553 seconds
TIMESTEP 4156 / STATE explore / EPSILON 0.09987828850002349 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.219412 / Loss  0.0640248805285
loop took 0.310438871383667 seconds
TIMESTEP 4157 / STATE explore / EPSILON 0.0998782552000235 / ACTION 0 / REWARD 0.1 / Q_MAX  0.154225 / Loss  0.0124261062592
loop took 0.29114270210266113 seconds
TIMESTEP 4158 / STATE explore / EPSILON 0.0998782219000235 / ACTION 0 / REWARD -1 / Q_MAX  -0.583478 / Loss  0.0357070416212
loop took 0.3081512451171875 seconds
TIMESTEP 4159 / STATE explore / EPSILON 0.09987818860002351 / ACTION 0 / REWARD 0.1 / Q_MAX  0.147436 / Loss  0.0349322892725
loop took 0.29077649116516113 seconds
TIMESTEP 4160 / STATE explore / EPSILON 0.09987815530002352 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.003003 / Loss  0.0321407876909
loop took 0.2837526798248291 seconds
TIMESTEP 4161 / STATE 

TIMESTEP 4206 / STATE explore / EPSILON 0.09987662350002381 / ACTION 0 / REWARD 0.1 / Q_MAX  0.610673 / Loss  0.12027759105
loop took 0.2878262996673584 seconds
TIMESTEP 4207 / STATE explore / EPSILON 0.09987659020002382 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.11365 / Loss  0.0302861109376
loop took 0.2925899028778076 seconds
TIMESTEP 4208 / STATE explore / EPSILON 0.09987655690002382 / ACTION 0 / REWARD 0.1 / Q_MAX  0.833101 / Loss  0.0263703446835
loop took 0.2578439712524414 seconds
TIMESTEP 4209 / STATE explore / EPSILON 0.09987652360002383 / ACTION 0 / REWARD -1 / Q_MAX  1.261 / Loss  0.0434215292335
loop took 0.2987945079803467 seconds
TIMESTEP 4210 / STATE explore / EPSILON 0.09987649030002384 / ACTION 0 / REWARD 0.1 / Q_MAX  0.649899 / Loss  0.037980414927
loop took 0.29984068870544434 seconds
TIMESTEP 4211 / STATE explore / EPSILON 0.09987645700002384 / ACTION 0 / REWARD 0.1 / Q_MAX  0.59384 / Loss  0.0248134844005
loop took 0.3011739253997803 seconds
TIMESTEP 4212 / STATE explore

TIMESTEP 4257 / STATE explore / EPSILON 0.09987492520002414 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0424063 / Loss  0.0498095341027
loop took 0.2907736301422119 seconds
TIMESTEP 4258 / STATE explore / EPSILON 0.09987489190002415 / ACTION 0 / REWARD -1 / Q_MAX  0.612976 / Loss  0.0214524418116
loop took 0.28275442123413086 seconds
TIMESTEP 4259 / STATE explore / EPSILON 0.09987485860002415 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0551898 / Loss  0.0297664776444
loop took 0.3128666877746582 seconds
TIMESTEP 4260 / STATE explore / EPSILON 0.09987482530002416 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0639035 / Loss  0.0438164956868
loop took 0.3154935836791992 seconds
TIMESTEP 4261 / STATE explore / EPSILON 0.09987479200002417 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.09734 / Loss  0.0391554571688
loop took 0.30445265769958496 seconds
TIMESTEP 4262 / STATE explore / EPSILON 0.09987475870002417 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0176664 / Loss  0.0149668743834
loop took 0.2967855930328369 seconds
TIMESTEP 4263 /

TIMESTEP 4307 / STATE explore / EPSILON 0.09987326020002446 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.168062 / Loss  0.0336808487773
loop took 0.2837533950805664 seconds
TIMESTEP 4308 / STATE explore / EPSILON 0.09987322690002447 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.311205 / Loss  0.0290854275227
loop took 0.29979705810546875 seconds
TIMESTEP 4309 / STATE explore / EPSILON 0.09987319360002447 / ACTION 0 / REWARD 0.1 / Q_MAX  0.391618 / Loss  0.01466956269
loop took 0.29861879348754883 seconds
TIMESTEP 4310 / STATE explore / EPSILON 0.09987316030002448 / ACTION 0 / REWARD 0.1 / Q_MAX  0.601654 / Loss  0.0240677073598
loop took 0.29967284202575684 seconds
TIMESTEP 4311 / STATE explore / EPSILON 0.09987312700002449 / ACTION 0 / REWARD 0.1 / Q_MAX  0.45087 / Loss  0.0203789863735
loop took 0.2847623825073242 seconds
TIMESTEP 4312 / STATE explore / EPSILON 0.09987309370002449 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.127563 / Loss  0.0373148657382
loop took 0.3088195323944092 seconds
TIMESTEP 4313 / STA

TIMESTEP 4358 / STATE explore / EPSILON 0.09987156190002479 / ACTION 0 / REWARD 0.1 / Q_MAX  0.433143 / Loss  0.0544274374843
loop took 0.2642557621002197 seconds
TIMESTEP 4359 / STATE explore / EPSILON 0.0998715286000248 / ACTION 0 / REWARD -1 / Q_MAX  0.201477 / Loss  0.00467260926962
loop took 0.294727087020874 seconds
TIMESTEP 4360 / STATE explore / EPSILON 0.0998714953000248 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.493 / Loss  0.0094152810052
loop took 0.31398701667785645 seconds
TIMESTEP 4361 / STATE explore / EPSILON 0.09987146200002481 / ACTION 0 / REWARD 0.1 / Q_MAX  0.16371 / Loss  0.0264090374112
loop took 0.30080366134643555 seconds
TIMESTEP 4362 / STATE explore / EPSILON 0.09987142870002481 / ACTION 0 / REWARD 0.1 / Q_MAX  0.162908 / Loss  0.0331511907279
loop took 0.31610989570617676 seconds
TIMESTEP 4363 / STATE explore / EPSILON 0.09987139540002482 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.576003 / Loss  0.0496655143797
loop took 0.29851341247558594 seconds
TIMESTEP 4364 / STATE e

TIMESTEP 4409 / STATE explore / EPSILON 0.09986986360002512 / ACTION 0 / REWARD -1 / Q_MAX  0.239774 / Loss  0.0113854240626
loop took 0.2937643527984619 seconds
TIMESTEP 4410 / STATE explore / EPSILON 0.09986983030002512 / ACTION 0 / REWARD 0.1 / Q_MAX  0.353886 / Loss  0.0318348631263
loop took 0.2847568988800049 seconds
TIMESTEP 4411 / STATE explore / EPSILON 0.09986979700002513 / ACTION 0 / REWARD 0.1 / Q_MAX  1.02992 / Loss  0.0108978673816
loop took 0.3569495677947998 seconds
TIMESTEP 4412 / STATE explore / EPSILON 0.09986976370002514 / ACTION 0 / REWARD 0.1 / Q_MAX  0.196435 / Loss  0.0163993854076
loop took 0.3368990421295166 seconds
TIMESTEP 4413 / STATE explore / EPSILON 0.09986973040002514 / ACTION 0 / REWARD 0.1 / Q_MAX  0.173954 / Loss  0.00864648632705
loop took 0.31562161445617676 seconds
TIMESTEP 4414 / STATE explore / EPSILON 0.09986969710002515 / ACTION 0 / REWARD 0.1 / Q_MAX  0.158624 / Loss  0.0275706499815
loop took 0.3513662815093994 seconds
TIMESTEP 4415 / STATE 

TIMESTEP 4460 / STATE explore / EPSILON 0.09986816530002544 / ACTION 0 / REWARD 0.1 / Q_MAX  0.256828 / Loss  0.212433740497
loop took 0.30060362815856934 seconds
TIMESTEP 4461 / STATE explore / EPSILON 0.09986813200002545 / ACTION 0 / REWARD 0.1 / Q_MAX  0.292935 / Loss  0.0327198803425
loop took 0.30181241035461426 seconds
TIMESTEP 4462 / STATE explore / EPSILON 0.09986809870002546 / ACTION 0 / REWARD 0.1 / Q_MAX  0.476678 / Loss  0.0295413658023
loop took 0.29828572273254395 seconds
TIMESTEP 4463 / STATE explore / EPSILON 0.09986806540002546 / ACTION 0 / REWARD 0.1 / Q_MAX  0.200324 / Loss  0.0253125242889
loop took 0.277285099029541 seconds
TIMESTEP 4464 / STATE explore / EPSILON 0.09986803210002547 / ACTION 0 / REWARD -1 / Q_MAX  1.09826 / Loss  0.0179597046226
loop took 0.2877376079559326 seconds
TIMESTEP 4465 / STATE explore / EPSILON 0.09986799880002548 / ACTION 0 / REWARD 0.1 / Q_MAX  0.292288 / Loss  0.0171847306192
loop took 0.28275275230407715 seconds
TIMESTEP 4466 / STATE 

TIMESTEP 4510 / STATE explore / EPSILON 0.09986650030002577 / ACTION 0 / REWARD 0.1 / Q_MAX  0.917164 / Loss  0.0323518514633
loop took 0.290722131729126 seconds
TIMESTEP 4511 / STATE explore / EPSILON 0.09986646700002577 / ACTION 0 / REWARD 0.1 / Q_MAX  0.837426 / Loss  0.0257552340627
loop took 0.31684207916259766 seconds
TIMESTEP 4512 / STATE explore / EPSILON 0.09986643370002578 / ACTION 0 / REWARD 0.1 / Q_MAX  0.617888 / Loss  0.0248866844922
loop took 0.29760193824768066 seconds
TIMESTEP 4513 / STATE explore / EPSILON 0.09986640040002578 / ACTION 0 / REWARD 0.1 / Q_MAX  0.600505 / Loss  0.0495858564973
loop took 0.31684446334838867 seconds
TIMESTEP 4514 / STATE explore / EPSILON 0.09986636710002579 / ACTION 0 / REWARD 0.1 / Q_MAX  0.339513 / Loss  0.0355009771883
loop took 0.29337286949157715 seconds
TIMESTEP 4515 / STATE explore / EPSILON 0.0998663338000258 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.355064 / Loss  0.0194271467626
loop took 0.29573941230773926 seconds
TIMESTEP 4516 / ST

TIMESTEP 4561 / STATE explore / EPSILON 0.09986480200002609 / ACTION 0 / REWARD 0.1 / Q_MAX  0.164198 / Loss  0.0331550203264
loop took 0.307830810546875 seconds
TIMESTEP 4562 / STATE explore / EPSILON 0.0998647687000261 / ACTION 0 / REWARD 0.1 / Q_MAX  0.179771 / Loss  0.0173757225275
loop took 0.2818641662597656 seconds
TIMESTEP 4563 / STATE explore / EPSILON 0.0998647354000261 / ACTION 0 / REWARD -1 / Q_MAX  0.211291 / Loss  0.0127984695137
loop took 0.28376078605651855 seconds
TIMESTEP 4564 / STATE explore / EPSILON 0.09986470210002611 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.288674 / Loss  0.0367739088833
loop took 0.3106091022491455 seconds
TIMESTEP 4565 / STATE explore / EPSILON 0.09986466880002612 / ACTION 0 / REWARD 0.1 / Q_MAX  0.622494 / Loss  0.0220325179398
loop took 0.3207590579986572 seconds
TIMESTEP 4566 / STATE explore / EPSILON 0.09986463550002612 / ACTION 0 / REWARD 0.1 / Q_MAX  0.17556 / Loss  0.0498119592667
loop took 0.2947962284088135 seconds
TIMESTEP 4567 / STATE exp

TIMESTEP 4612 / STATE explore / EPSILON 0.09986310370002642 / ACTION 0 / REWARD 0.1 / Q_MAX  0.160961 / Loss  0.213677868247
loop took 0.28074002265930176 seconds
TIMESTEP 4613 / STATE explore / EPSILON 0.09986307040002643 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.148274 / Loss  0.0103313056752
loop took 0.26871228218078613 seconds
TIMESTEP 4614 / STATE explore / EPSILON 0.09986303710002643 / ACTION 0 / REWARD 0.1 / Q_MAX  0.158097 / Loss  0.0348532982171
loop took 0.2657027244567871 seconds
TIMESTEP 4615 / STATE explore / EPSILON 0.09986300380002644 / ACTION 0 / REWARD 0.1 / Q_MAX  0.170374 / Loss  0.0239561311901
loop took 0.26571059226989746 seconds
TIMESTEP 4616 / STATE explore / EPSILON 0.09986297050002645 / ACTION 0 / REWARD 0.1 / Q_MAX  0.197516 / Loss  0.215849041939
loop took 0.3037912845611572 seconds
TIMESTEP 4617 / STATE explore / EPSILON 0.09986293720002645 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0134856 / Loss  0.0229885987937
loop took 0.3196110725402832 seconds
TIMESTEP 4618 / STA

TIMESTEP 4663 / STATE explore / EPSILON 0.09986140540002675 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.182206 / Loss  0.0380952358246
loop took 0.28576016426086426 seconds
TIMESTEP 4664 / STATE explore / EPSILON 0.09986137210002675 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.665855 / Loss  0.0484477877617
loop took 0.30080318450927734 seconds
TIMESTEP 4665 / STATE explore / EPSILON 0.09986133880002676 / ACTION 0 / REWARD 0.1 / Q_MAX  0.443545 / Loss  0.016306694597
loop took 0.30784034729003906 seconds
TIMESTEP 4666 / STATE explore / EPSILON 0.09986130550002677 / ACTION 0 / REWARD 0.1 / Q_MAX  0.340521 / Loss  0.0186235774308
loop took 0.3168978691101074 seconds
TIMESTEP 4667 / STATE explore / EPSILON 0.09986127220002677 / ACTION 0 / REWARD -1 / Q_MAX  0.343872 / Loss  0.00744422245771
loop took 0.30423545837402344 seconds
TIMESTEP 4668 / STATE explore / EPSILON 0.09986123890002678 / ACTION 0 / REWARD 0.1 / Q_MAX  0.371388 / Loss  0.0202900562435
loop took 0.299938440322876 seconds
TIMESTEP 4669 / ST

TIMESTEP 4714 / STATE explore / EPSILON 0.09985970710002708 / ACTION 0 / REWARD 0.1 / Q_MAX  0.656988 / Loss  0.0267548523843
loop took 0.29981017112731934 seconds
TIMESTEP 4715 / STATE explore / EPSILON 0.09985967380002708 / ACTION 0 / REWARD 0.1 / Q_MAX  0.695502 / Loss  0.0719514265656
loop took 0.3168461322784424 seconds
TIMESTEP 4716 / STATE explore / EPSILON 0.09985964050002709 / ACTION 0 / REWARD 0.1 / Q_MAX  1.22636 / Loss  0.0482421889901
loop took 0.3082249164581299 seconds
TIMESTEP 4717 / STATE explore / EPSILON 0.0998596072000271 / ACTION 0 / REWARD -1 / Q_MAX  -0.701242 / Loss  0.0333053767681
loop took 0.3008003234863281 seconds
TIMESTEP 4718 / STATE explore / EPSILON 0.0998595739000271 / ACTION 0 / REWARD 0.1 / Q_MAX  0.624629 / Loss  0.0376831591129
loop took 0.2999551296234131 seconds
TIMESTEP 4719 / STATE explore / EPSILON 0.09985954060002711 / ACTION 0 / REWARD 0.1 / Q_MAX  0.152289 / Loss  0.0731995552778
loop took 0.3148355484008789 seconds
TIMESTEP 4720 / STATE ex

TIMESTEP 4765 / STATE explore / EPSILON 0.0998580088000274 / ACTION 0 / REWARD 0.1 / Q_MAX  0.095899 / Loss  0.0544219240546
loop took 0.3005645275115967 seconds
TIMESTEP 4766 / STATE explore / EPSILON 0.09985797550002741 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0527552 / Loss  0.00776554876938
loop took 0.3147881031036377 seconds
TIMESTEP 4767 / STATE explore / EPSILON 0.09985794220002742 / ACTION 0 / REWARD 0.1 / Q_MAX  0.206415 / Loss  0.0257508829236
loop took 0.2867577075958252 seconds
TIMESTEP 4768 / STATE explore / EPSILON 0.09985790890002742 / ACTION 0 / REWARD 0.1 / Q_MAX  0.355422 / Loss  0.0144430650398
loop took 0.2857666015625 seconds
TIMESTEP 4769 / STATE explore / EPSILON 0.09985787560002743 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0121719 / Loss  0.0115250637755
loop took 0.283374547958374 seconds
TIMESTEP 4770 / STATE explore / EPSILON 0.09985784230002744 / ACTION 0 / REWARD -1 / Q_MAX  0.36964 / Loss  0.0265514440835
loop took 0.29926228523254395 seconds
TIMESTEP 4771 / STATE ex

TIMESTEP 4816 / STATE explore / EPSILON 0.09985631050002773 / ACTION 0 / REWARD 0.1 / Q_MAX  0.613426 / Loss  0.021168012172
loop took 0.30985212326049805 seconds
TIMESTEP 4817 / STATE explore / EPSILON 0.09985627720002774 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0762215 / Loss  0.0840292945504
loop took 0.28911805152893066 seconds
TIMESTEP 4818 / STATE explore / EPSILON 0.09985624390002774 / ACTION 0 / REWARD 0.1 / Q_MAX  0.260857 / Loss  0.0366892144084
loop took 0.28499865531921387 seconds
TIMESTEP 4819 / STATE explore / EPSILON 0.09985621060002775 / ACTION 0 / REWARD -1 / Q_MAX  0.667927 / Loss  0.0243068449199
loop took 0.3078489303588867 seconds
TIMESTEP 4820 / STATE explore / EPSILON 0.09985617730002776 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0157542 / Loss  0.0214383173734
loop took 0.3158400058746338 seconds
TIMESTEP 4821 / STATE explore / EPSILON 0.09985614400002776 / ACTION 0 / REWARD 0.1 / Q_MAX  0.575163 / Loss  0.0195550695062
loop took 0.2917764186859131 seconds
TIMESTEP 4822 / S

TIMESTEP 4867 / STATE explore / EPSILON 0.09985461220002806 / ACTION 0 / REWARD 0.1 / Q_MAX  0.632612 / Loss  0.0449678599834
loop took 0.2975618839263916 seconds
TIMESTEP 4868 / STATE explore / EPSILON 0.09985457890002807 / ACTION 0 / REWARD 0.1 / Q_MAX  0.26558 / Loss  0.0173862278461
loop took 0.3068735599517822 seconds
TIMESTEP 4869 / STATE explore / EPSILON 0.09985454560002807 / ACTION 0 / REWARD 0.1 / Q_MAX  0.21064 / Loss  0.0215557999909
loop took 0.291766881942749 seconds
TIMESTEP 4870 / STATE explore / EPSILON 0.09985451230002808 / ACTION 0 / REWARD 0.1 / Q_MAX  0.283989 / Loss  0.0341132953763
loop took 0.30836963653564453 seconds
TIMESTEP 4871 / STATE explore / EPSILON 0.09985447900002808 / ACTION 0 / REWARD 0.1 / Q_MAX  0.89175 / Loss  0.0380192995071
loop took 0.3198397159576416 seconds
TIMESTEP 4872 / STATE explore / EPSILON 0.09985444570002809 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0178531 / Loss  0.011216240935
loop took 0.2867600917816162 seconds
TIMESTEP 4873 / STATE ex

TIMESTEP 4918 / STATE explore / EPSILON 0.09985291390002839 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.547888 / Loss  0.0177069120109
loop took 0.29979729652404785 seconds
TIMESTEP 4919 / STATE explore / EPSILON 0.09985288060002839 / ACTION 0 / REWARD 0.1 / Q_MAX  0.356156 / Loss  0.0391955971718
loop took 0.2837543487548828 seconds
TIMESTEP 4920 / STATE explore / EPSILON 0.0998528473000284 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0462531 / Loss  0.0289063770324
loop took 0.3188514709472656 seconds
TIMESTEP 4921 / STATE explore / EPSILON 0.0998528140000284 / ACTION 0 / REWARD 0.1 / Q_MAX  0.249409 / Loss  0.0293952189386
loop took 0.295015811920166 seconds
TIMESTEP 4922 / STATE explore / EPSILON 0.09985278070002841 / ACTION 0 / REWARD 0.1 / Q_MAX  0.112571 / Loss  0.0164647065103
loop took 0.284512996673584 seconds
TIMESTEP 4923 / STATE explore / EPSILON 0.09985274740002842 / ACTION 0 / REWARD 0.1 / Q_MAX  0.740952 / Loss  0.0253269243985
loop took 0.2877669334411621 seconds
TIMESTEP 4924 / STATE e

TIMESTEP 4969 / STATE explore / EPSILON 0.09985121560002871 / ACTION 0 / REWARD 0.1 / Q_MAX  0.491408 / Loss  0.0223618540913
loop took 0.3128962516784668 seconds
TIMESTEP 4970 / STATE explore / EPSILON 0.09985118230002872 / ACTION 0 / REWARD 0.1 / Q_MAX  0.41019 / Loss  0.0134115405381
loop took 0.3150169849395752 seconds
TIMESTEP 4971 / STATE explore / EPSILON 0.09985114900002873 / ACTION 0 / REWARD 0.1 / Q_MAX  0.360375 / Loss  0.0432521179318
loop took 0.31287074089050293 seconds
TIMESTEP 4972 / STATE explore / EPSILON 0.09985111570002873 / ACTION 0 / REWARD 0.1 / Q_MAX  0.445215 / Loss  0.0138091742992
loop took 0.3134632110595703 seconds
TIMESTEP 4973 / STATE explore / EPSILON 0.09985108240002874 / ACTION 0 / REWARD -1 / Q_MAX  0.571391 / Loss  0.020208414644
loop took 0.293773889541626 seconds
TIMESTEP 4974 / STATE explore / EPSILON 0.09985104910002875 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.15356 / Loss  0.0177536159754
loop took 0.2827460765838623 seconds
TIMESTEP 4975 / STATE exp

TIMESTEP 5020 / STATE explore / EPSILON 0.09984951730002904 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.746564 / Loss  0.0166631378233
loop took 0.3278694152832031 seconds
TIMESTEP 5021 / STATE explore / EPSILON 0.09984948400002905 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.264551 / Loss  0.185451686382
loop took 0.28288912773132324 seconds
TIMESTEP 5022 / STATE explore / EPSILON 0.09984945070002905 / ACTION 0 / REWARD 0.1 / Q_MAX  0.214276 / Loss  0.0104478048161
loop took 0.29378366470336914 seconds
TIMESTEP 5023 / STATE explore / EPSILON 0.09984941740002906 / ACTION 0 / REWARD -1 / Q_MAX  -0.240511 / Loss  0.0168733634055
loop took 0.3048083782196045 seconds
TIMESTEP 5024 / STATE explore / EPSILON 0.09984938410002907 / ACTION 0 / REWARD 0.1 / Q_MAX  0.243874 / Loss  0.00563068548217
loop took 0.2837550640106201 seconds
TIMESTEP 5025 / STATE explore / EPSILON 0.09984935080002907 / ACTION 0 / REWARD 0.1 / Q_MAX  0.525136 / Loss  0.0209254324436
loop took 0.28676557540893555 seconds
TIMESTEP 5026 / S

TIMESTEP 5070 / STATE explore / EPSILON 0.09984785230002936 / ACTION 0 / REWARD 0.1 / Q_MAX  0.506742 / Loss  0.0166601333767
loop took 0.3108513355255127 seconds
TIMESTEP 5071 / STATE explore / EPSILON 0.09984781900002937 / ACTION 0 / REWARD 0.1 / Q_MAX  0.33471 / Loss  0.0313714891672
loop took 0.2987945079803467 seconds
TIMESTEP 5072 / STATE explore / EPSILON 0.09984778570002938 / ACTION 0 / REWARD 0.1 / Q_MAX  0.450284 / Loss  0.0171290040016
loop took 0.29090189933776855 seconds
TIMESTEP 5073 / STATE explore / EPSILON 0.09984775240002938 / ACTION 0 / REWARD 0.1 / Q_MAX  0.481897 / Loss  0.0446440242231
loop took 0.3088192939758301 seconds
TIMESTEP 5074 / STATE explore / EPSILON 0.09984771910002939 / ACTION 0 / REWARD 0.1 / Q_MAX  0.290817 / Loss  0.0563988015056
loop took 0.30182909965515137 seconds
TIMESTEP 5075 / STATE explore / EPSILON 0.0998476858000294 / ACTION 0 / REWARD -1 / Q_MAX  -0.174809 / Loss  0.0391996651888
loop took 0.3159515857696533 seconds
TIMESTEP 5076 / STATE 

TIMESTEP 5121 / STATE explore / EPSILON 0.09984615400002969 / ACTION 0 / REWARD 0.1 / Q_MAX  0.557022 / Loss  0.0540744997561
loop took 0.2827591896057129 seconds
TIMESTEP 5122 / STATE explore / EPSILON 0.0998461207000297 / ACTION 0 / REWARD 0.1 / Q_MAX  0.50454 / Loss  0.0216006673872
loop took 0.32090115547180176 seconds
TIMESTEP 5123 / STATE explore / EPSILON 0.0998460874000297 / ACTION 0 / REWARD 0.1 / Q_MAX  0.581293 / Loss  0.0087550226599
loop took 0.2936983108520508 seconds
TIMESTEP 5124 / STATE explore / EPSILON 0.09984605410002971 / ACTION 0 / REWARD 0.1 / Q_MAX  1.488 / Loss  0.0450697503984
loop took 0.31348633766174316 seconds
TIMESTEP 5125 / STATE explore / EPSILON 0.09984602080002972 / ACTION 0 / REWARD 0.1 / Q_MAX  0.442535 / Loss  0.044205956161
loop took 0.30280399322509766 seconds
TIMESTEP 5126 / STATE explore / EPSILON 0.09984598750002972 / ACTION 0 / REWARD 0.1 / Q_MAX  0.430419 / Loss  0.0133209768683
loop took 0.2936828136444092 seconds
TIMESTEP 5127 / STATE expl

TIMESTEP 5172 / STATE explore / EPSILON 0.09984445570003002 / ACTION 0 / REWARD 0.1 / Q_MAX  0.387066 / Loss  0.0229259990156
loop took 0.3066751956939697 seconds
TIMESTEP 5173 / STATE explore / EPSILON 0.09984442240003003 / ACTION 0 / REWARD 0.1 / Q_MAX  0.3124 / Loss  0.0186722502112
loop took 0.2995131015777588 seconds
TIMESTEP 5174 / STATE explore / EPSILON 0.09984438910003003 / ACTION 0 / REWARD 0.1 / Q_MAX  0.312847 / Loss  0.0320464968681
loop took 0.2837560176849365 seconds
TIMESTEP 5175 / STATE explore / EPSILON 0.09984435580003004 / ACTION 0 / REWARD 0.1 / Q_MAX  0.32323 / Loss  0.0176391489804
loop took 0.284759521484375 seconds
TIMESTEP 5176 / STATE explore / EPSILON 0.09984432250003004 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.906086 / Loss  0.0140258781612
loop took 0.29849815368652344 seconds
TIMESTEP 5177 / STATE explore / EPSILON 0.09984428920003005 / ACTION 0 / REWARD -1 / Q_MAX  0.347536 / Loss  0.0218784529716
loop took 0.2979004383087158 seconds
TIMESTEP 5178 / STATE exp

TIMESTEP 5223 / STATE explore / EPSILON 0.09984275740003035 / ACTION 0 / REWARD 0.1 / Q_MAX  0.443566 / Loss  0.0141853988171
loop took 0.33087992668151855 seconds
TIMESTEP 5224 / STATE explore / EPSILON 0.09984272410003035 / ACTION 0 / REWARD 0.1 / Q_MAX  0.55022 / Loss  0.0167015660554
loop took 0.2927696704864502 seconds
TIMESTEP 5225 / STATE explore / EPSILON 0.09984269080003036 / ACTION 0 / REWARD 0.1 / Q_MAX  1.23661 / Loss  0.0115242209285
loop took 0.30993032455444336 seconds
TIMESTEP 5226 / STATE explore / EPSILON 0.09984265750003037 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.400608 / Loss  0.013212878257
loop took 0.30046749114990234 seconds
TIMESTEP 5227 / STATE explore / EPSILON 0.09984262420003037 / ACTION 0 / REWARD -1 / Q_MAX  0.670153 / Loss  0.0107276635244
loop took 0.3138554096221924 seconds
TIMESTEP 5228 / STATE explore / EPSILON 0.09984259090003038 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.770639 / Loss  0.0205456689
loop took 0.32085442543029785 seconds
TIMESTEP 5229 / STATE e

TIMESTEP 5274 / STATE explore / EPSILON 0.09984105910003067 / ACTION 0 / REWARD 0.1 / Q_MAX  0.611414 / Loss  0.0196644384414
loop took 0.2521798610687256 seconds
TIMESTEP 5275 / STATE explore / EPSILON 0.09984102580003068 / ACTION 0 / REWARD -1 / Q_MAX  0.388424 / Loss  0.0516517981887
loop took 0.26470398902893066 seconds
TIMESTEP 5276 / STATE explore / EPSILON 0.09984099250003069 / ACTION 0 / REWARD 0.1 / Q_MAX  0.393369 / Loss  0.199556559324
loop took 0.28174924850463867 seconds
TIMESTEP 5277 / STATE explore / EPSILON 0.0998409592000307 / ACTION 0 / REWARD 0.1 / Q_MAX  0.390324 / Loss  0.0247247572988
loop took 0.32083964347839355 seconds
TIMESTEP 5278 / STATE explore / EPSILON 0.0998409259000307 / ACTION 0 / REWARD 0.1 / Q_MAX  0.381775 / Loss  0.0295934416354
loop took 0.29679107666015625 seconds
TIMESTEP 5279 / STATE explore / EPSILON 0.0998408926000307 / ACTION 0 / REWARD 0.1 / Q_MAX  0.387227 / Loss  0.0404437035322
loop took 0.30989980697631836 seconds
TIMESTEP 5280 / STATE 

TIMESTEP 5325 / STATE explore / EPSILON 0.099839360800031 / ACTION 0 / REWARD 0.1 / Q_MAX  0.330509 / Loss  0.0374332182109
loop took 0.30916643142700195 seconds
TIMESTEP 5326 / STATE explore / EPSILON 0.09983932750003101 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.32123 / Loss  0.0221028868109
loop took 0.28376126289367676 seconds
TIMESTEP 5327 / STATE explore / EPSILON 0.09983929420003101 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.986544 / Loss  0.0209275297821
loop took 0.3109745979309082 seconds
TIMESTEP 5328 / STATE explore / EPSILON 0.09983926090003102 / ACTION 0 / REWARD 0.1 / Q_MAX  0.37318 / Loss  0.00633711088449
loop took 0.2887585163116455 seconds
TIMESTEP 5329 / STATE explore / EPSILON 0.09983922760003103 / ACTION 0 / REWARD 0.1 / Q_MAX  0.943834 / Loss  0.00631080102175
loop took 0.313340425491333 seconds
TIMESTEP 5330 / STATE explore / EPSILON 0.09983919430003103 / ACTION 0 / REWARD 0.1 / Q_MAX  0.53011 / Loss  0.0172102749348
loop took 0.3018679618835449 seconds
TIMESTEP 5331 / STATE 

TIMESTEP 5376 / STATE explore / EPSILON 0.09983766250003133 / ACTION 0 / REWARD 0.1 / Q_MAX  0.465642 / Loss  0.021043676883
loop took 0.28327131271362305 seconds
TIMESTEP 5377 / STATE explore / EPSILON 0.09983762920003134 / ACTION 0 / REWARD 0.1 / Q_MAX  0.401808 / Loss  0.0279895123094
loop took 0.2837860584259033 seconds
TIMESTEP 5378 / STATE explore / EPSILON 0.09983759590003134 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0175289 / Loss  0.01984542422
loop took 0.29979443550109863 seconds
TIMESTEP 5379 / STATE explore / EPSILON 0.09983756260003135 / ACTION 0 / REWARD 0.1 / Q_MAX  0.457417 / Loss  0.0306994505227
loop took 0.3176298141479492 seconds
TIMESTEP 5380 / STATE explore / EPSILON 0.09983752930003136 / ACTION 0 / REWARD -1 / Q_MAX  0.345167 / Loss  0.0141357146204
loop took 0.3078188896179199 seconds
TIMESTEP 5381 / STATE explore / EPSILON 0.09983749600003136 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.658955 / Loss  0.0132770333439
loop took 0.30504655838012695 seconds
TIMESTEP 5382 / STATE

TIMESTEP 5427 / STATE explore / EPSILON 0.09983596420003166 / ACTION 0 / REWARD 0.1 / Q_MAX  1.29081 / Loss  0.0146277425811
loop took 0.30751585960388184 seconds
TIMESTEP 5428 / STATE explore / EPSILON 0.09983593090003166 / ACTION 0 / REWARD 0.1 / Q_MAX  0.193373 / Loss  0.0102858608589
loop took 0.30089664459228516 seconds
TIMESTEP 5429 / STATE explore / EPSILON 0.09983589760003167 / ACTION 0 / REWARD 0.1 / Q_MAX  0.345187 / Loss  0.018486559391
loop took 0.2737133502960205 seconds
TIMESTEP 5430 / STATE explore / EPSILON 0.09983586430003168 / ACTION 0 / REWARD -1 / Q_MAX  0.66876 / Loss  0.0177649818361
loop took 0.2998032569885254 seconds
TIMESTEP 5431 / STATE explore / EPSILON 0.09983583100003168 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.03612 / Loss  0.0240349527448
loop took 0.30883193016052246 seconds
TIMESTEP 5432 / STATE explore / EPSILON 0.09983579770003169 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.23363 / Loss  0.01748579368
loop took 0.29138946533203125 seconds
TIMESTEP 5433 / STATE ex

TIMESTEP 5477 / STATE explore / EPSILON 0.09983429920003198 / ACTION 0 / REWARD -1 / Q_MAX  0.534961 / Loss  0.0291047692299
loop took 0.2957618236541748 seconds
TIMESTEP 5478 / STATE explore / EPSILON 0.09983426590003198 / ACTION 0 / REWARD 0.1 / Q_MAX  0.335194 / Loss  0.023518614471
loop took 0.2946629524230957 seconds
TIMESTEP 5479 / STATE explore / EPSILON 0.09983423260003199 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0965103 / Loss  0.083897985518
loop took 0.28876543045043945 seconds
TIMESTEP 5480 / STATE explore / EPSILON 0.099834199300032 / ACTION 0 / REWARD 0.1 / Q_MAX  0.488136 / Loss  0.0528724901378
loop took 0.3138270378112793 seconds
TIMESTEP 5481 / STATE explore / EPSILON 0.099834166000032 / ACTION 0 / REWARD 0.1 / Q_MAX  0.582984 / Loss  0.022604893893
loop took 0.313854455947876 seconds
TIMESTEP 5482 / STATE explore / EPSILON 0.09983413270003201 / ACTION 0 / REWARD 0.1 / Q_MAX  0.32485 / Loss  0.0837377756834
loop took 0.2871367931365967 seconds
TIMESTEP 5483 / STATE explore

TIMESTEP 5528 / STATE explore / EPSILON 0.0998326009000323 / ACTION 0 / REWARD 0.1 / Q_MAX  0.679216 / Loss  0.229462325573
loop took 0.29979753494262695 seconds
TIMESTEP 5529 / STATE explore / EPSILON 0.09983256760003231 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.746974 / Loss  0.0191364903003
loop took 0.30594730377197266 seconds
TIMESTEP 5530 / STATE explore / EPSILON 0.09983253430003232 / ACTION 0 / REWARD 0.1 / Q_MAX  0.123229 / Loss  0.00677803857252
loop took 1.0318470001220703 seconds
TIMESTEP 5531 / STATE explore / EPSILON 0.09983250100003233 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.0314144 / Loss  0.0106742996722
loop took 0.29173827171325684 seconds
TIMESTEP 5532 / STATE explore / EPSILON 0.09983246770003233 / ACTION 0 / REWARD 0.1 / Q_MAX  0.102853 / Loss  0.0283922087401
loop took 0.33789634704589844 seconds
TIMESTEP 5533 / STATE explore / EPSILON 0.09983243440003234 / ACTION 0 / REWARD 0.1 / Q_MAX  0.579067 / Loss  0.0316221415997
loop took 0.2737259864807129 seconds
TIMESTEP 5534 / 

TIMESTEP 5578 / STATE explore / EPSILON 0.09983093590003263 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.357734 / Loss  0.0268799867481
loop took 0.2927820682525635 seconds
TIMESTEP 5579 / STATE explore / EPSILON 0.09983090260003263 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.157051 / Loss  0.0251461267471
loop took 0.29681944847106934 seconds
TIMESTEP 5580 / STATE explore / EPSILON 0.09983086930003264 / ACTION 0 / REWARD 0.1 / Q_MAX  0.205174 / Loss  0.00519434083253
loop took 0.310863733291626 seconds
TIMESTEP 5581 / STATE explore / EPSILON 0.09983083600003265 / ACTION 0 / REWARD 0.1 / Q_MAX  0.193728 / Loss  0.181532993913
loop took 0.32590293884277344 seconds
TIMESTEP 5582 / STATE explore / EPSILON 0.09983080270003265 / ACTION 0 / REWARD 0.1 / Q_MAX  0.183389 / Loss  0.0174239724874
loop took 0.30795741081237793 seconds
TIMESTEP 5583 / STATE explore / EPSILON 0.09983076940003266 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0856516 / Loss  0.0293194577098
loop took 0.3101646900177002 seconds
TIMESTEP 5584 / S

TIMESTEP 5629 / STATE explore / EPSILON 0.09982923760003296 / ACTION 0 / REWARD 0.1 / Q_MAX  0.70115 / Loss  0.0151692442596
loop took 0.29979658126831055 seconds
TIMESTEP 5630 / STATE explore / EPSILON 0.09982920430003296 / ACTION 0 / REWARD 0.1 / Q_MAX  1.41785 / Loss  0.0163259487599
----------Random Action----------
loop took 0.31388068199157715 seconds
TIMESTEP 5631 / STATE explore / EPSILON 0.09982917100003297 / ACTION 0 / REWARD 0.1 / Q_MAX  0.503492 / Loss  0.0283436197788
loop took 0.30499982833862305 seconds
TIMESTEP 5632 / STATE explore / EPSILON 0.09982913770003297 / ACTION 0 / REWARD -1 / Q_MAX  -0.00376958 / Loss  0.0151355182752
loop took 0.31380701065063477 seconds
TIMESTEP 5633 / STATE explore / EPSILON 0.09982910440003298 / ACTION 0 / REWARD 0.1 / Q_MAX  0.838478 / Loss  0.0118124876171
loop took 0.2827885150909424 seconds
TIMESTEP 5634 / STATE explore / EPSILON 0.09982907110003299 / ACTION 0 / REWARD 0.1 / Q_MAX  0.410045 / Loss  0.0212654285133
loop took 0.283910751

TIMESTEP 5679 / STATE explore / EPSILON 0.09982757260003328 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.869016 / Loss  0.00715804519132
loop took 0.32546281814575195 seconds
TIMESTEP 5680 / STATE explore / EPSILON 0.09982753930003328 / ACTION 0 / REWARD 0.1 / Q_MAX  0.282599 / Loss  0.0181598849595
loop took 0.29373693466186523 seconds
TIMESTEP 5681 / STATE explore / EPSILON 0.09982750600003329 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.989904 / Loss  0.0314327776432
loop took 0.2838466167449951 seconds
TIMESTEP 5682 / STATE explore / EPSILON 0.0998274727000333 / ACTION 0 / REWARD 0.1 / Q_MAX  0.228612 / Loss  0.0347923487425
loop took 0.2991750240325928 seconds
TIMESTEP 5683 / STATE explore / EPSILON 0.0998274394000333 / ACTION 0 / REWARD 0.1 / Q_MAX  0.825807 / Loss  0.029370252043
loop took 0.2675817012786865 seconds
TIMESTEP 5684 / STATE explore / EPSILON 0.09982740610003331 / ACTION 0 / REWARD -1 / Q_MAX  -1.03771 / Loss  0.0549311451614
loop took 0.311845064163208 seconds
TIMESTEP 5685 / STATE 

TIMESTEP 5730 / STATE explore / EPSILON 0.0998258743000336 / ACTION 0 / REWARD 0.1 / Q_MAX  0.201663 / Loss  0.0125891249627
loop took 1.0280866622924805 seconds
TIMESTEP 5731 / STATE explore / EPSILON 0.09982584100003361 / ACTION 1 / REWARD 0.1 / Q_MAX  0.222158 / Loss  0.00874815136194
loop took 0.28275036811828613 seconds
TIMESTEP 5732 / STATE explore / EPSILON 0.09982580770003362 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0859028 / Loss  0.0104706473649
loop took 0.2785007953643799 seconds
TIMESTEP 5733 / STATE explore / EPSILON 0.09982577440003362 / ACTION 0 / REWARD -1 / Q_MAX  0.0364117 / Loss  0.027563476935
loop took 0.30208611488342285 seconds
TIMESTEP 5734 / STATE explore / EPSILON 0.09982574110003363 / ACTION 0 / REWARD 0.1 / Q_MAX  0.465154 / Loss  0.154206186533
loop took 0.295304536819458 seconds
TIMESTEP 5735 / STATE explore / EPSILON 0.09982570780003364 / ACTION 0 / REWARD 0.1 / Q_MAX  0.668284 / Loss  0.0110613629222
loop took 0.2898552417755127 seconds
TIMESTEP 5736 / STATE 

TIMESTEP 5780 / STATE explore / EPSILON 0.09982420930003393 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.655109 / Loss  0.0107898879796
loop took 0.31585049629211426 seconds
TIMESTEP 5781 / STATE explore / EPSILON 0.09982417600003393 / ACTION 0 / REWARD 0.1 / Q_MAX  0.434644 / Loss  0.00853285007179
loop took 0.32993650436401367 seconds
TIMESTEP 5782 / STATE explore / EPSILON 0.09982414270003394 / ACTION 0 / REWARD 0.1 / Q_MAX  0.414625 / Loss  0.0194522161037
loop took 0.29889917373657227 seconds
TIMESTEP 5783 / STATE explore / EPSILON 0.09982410940003394 / ACTION 0 / REWARD -1 / Q_MAX  -0.443436 / Loss  0.0191275514662
loop took 0.3079352378845215 seconds
TIMESTEP 5784 / STATE explore / EPSILON 0.09982407610003395 / ACTION 0 / REWARD 0.1 / Q_MAX  0.136533 / Loss  0.0182440206409
loop took 0.29979729652404785 seconds
TIMESTEP 5785 / STATE explore / EPSILON 0.09982404280003396 / ACTION 0 / REWARD 0.1 / Q_MAX  0.317813 / Loss  0.00846203230321
loop took 0.2857635021209717 seconds
TIMESTEP 5786 /

TIMESTEP 5831 / STATE explore / EPSILON 0.09982251100003425 / ACTION 0 / REWARD 0.1 / Q_MAX  0.36166 / Loss  0.00746177975088
loop took 0.29076099395751953 seconds
TIMESTEP 5832 / STATE explore / EPSILON 0.09982247770003426 / ACTION 0 / REWARD -1 / Q_MAX  0.454068 / Loss  0.0165373980999
loop took 0.2837834358215332 seconds
TIMESTEP 5833 / STATE explore / EPSILON 0.09982244440003427 / ACTION 0 / REWARD 0.1 / Q_MAX  0.37031 / Loss  0.0118091898039
loop took 0.2998082637786865 seconds
TIMESTEP 5834 / STATE explore / EPSILON 0.09982241110003427 / ACTION 0 / REWARD 0.1 / Q_MAX  0.378048 / Loss  0.0254570674151
loop took 0.31095457077026367 seconds
TIMESTEP 5835 / STATE explore / EPSILON 0.09982237780003428 / ACTION 0 / REWARD 0.1 / Q_MAX  0.353211 / Loss  0.0215300656855
loop took 0.2867598533630371 seconds
TIMESTEP 5836 / STATE explore / EPSILON 0.09982234450003429 / ACTION 0 / REWARD 0.1 / Q_MAX  0.50471 / Loss  0.0409064665437
loop took 0.28679418563842773 seconds
TIMESTEP 5837 / STATE 

TIMESTEP 5882 / STATE explore / EPSILON 0.09982081270003458 / ACTION 0 / REWARD 0.1 / Q_MAX  0.348237 / Loss  0.0223319157958
loop took 0.31133508682250977 seconds
TIMESTEP 5883 / STATE explore / EPSILON 0.09982077940003459 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.937628 / Loss  0.0405907146633
loop took 0.2897663116455078 seconds
TIMESTEP 5884 / STATE explore / EPSILON 0.0998207461000346 / ACTION 0 / REWARD 0.1 / Q_MAX  0.333584 / Loss  0.0469430834055
loop took 0.29955077171325684 seconds
TIMESTEP 5885 / STATE explore / EPSILON 0.0998207128000346 / ACTION 0 / REWARD 0.1 / Q_MAX  0.312919 / Loss  0.0277537703514
loop took 0.30633091926574707 seconds
TIMESTEP 5886 / STATE explore / EPSILON 0.0998206795000346 / ACTION 0 / REWARD -1 / Q_MAX  0.549631 / Loss  0.0320391207933
loop took 0.2987990379333496 seconds
TIMESTEP 5887 / STATE explore / EPSILON 0.09982064620003461 / ACTION 0 / REWARD 0.1 / Q_MAX  0.385901 / Loss  0.0238632243127
loop took 0.28391098976135254 seconds
TIMESTEP 5888 / STATE

TIMESTEP 5933 / STATE explore / EPSILON 0.09981911440003491 / ACTION 0 / REWARD -1 / Q_MAX  0.22902 / Loss  0.0153268761933
loop took 0.2847607135772705 seconds
TIMESTEP 5934 / STATE explore / EPSILON 0.09981908110003491 / ACTION 0 / REWARD 0.1 / Q_MAX  0.212694 / Loss  0.020218078047
loop took 0.2921566963195801 seconds
TIMESTEP 5935 / STATE explore / EPSILON 0.09981904780003492 / ACTION 0 / REWARD 0.1 / Q_MAX  0.195579 / Loss  0.0220547281206
loop took 0.2893977165222168 seconds
TIMESTEP 5936 / STATE explore / EPSILON 0.09981901450003493 / ACTION 0 / REWARD 0.1 / Q_MAX  0.250534 / Loss  0.0112805645913
loop took 0.28376030921936035 seconds
TIMESTEP 5937 / STATE explore / EPSILON 0.09981898120003493 / ACTION 0 / REWARD 0.1 / Q_MAX  0.180416 / Loss  0.015587085858
loop took 0.2837517261505127 seconds
TIMESTEP 5938 / STATE explore / EPSILON 0.09981894790003494 / ACTION 0 / REWARD 0.1 / Q_MAX  0.18165 / Loss  0.0153692755848
loop took 0.33187341690063477 seconds
TIMESTEP 5939 / STATE exp

TIMESTEP 5984 / STATE explore / EPSILON 0.09981741610003524 / ACTION 0 / REWARD 0.1 / Q_MAX  0.621057 / Loss  0.0193169303238
loop took 0.30788636207580566 seconds
TIMESTEP 5985 / STATE explore / EPSILON 0.09981738280003524 / ACTION 0 / REWARD 0.1 / Q_MAX  0.483351 / Loss  0.0240855328739
loop took 0.33058667182922363 seconds
TIMESTEP 5986 / STATE explore / EPSILON 0.09981734950003525 / ACTION 0 / REWARD 0.1 / Q_MAX  0.602613 / Loss  0.0107625639066
loop took 0.2857940196990967 seconds
TIMESTEP 5987 / STATE explore / EPSILON 0.09981731620003526 / ACTION 0 / REWARD 0.1 / Q_MAX  0.123855 / Loss  0.0214610435069
loop took 0.3075554370880127 seconds
TIMESTEP 5988 / STATE explore / EPSILON 0.09981728290003526 / ACTION 0 / REWARD 0.1 / Q_MAX  0.488886 / Loss  0.0266937278211
loop took 0.290740966796875 seconds
TIMESTEP 5989 / STATE explore / EPSILON 0.09981724960003527 / ACTION 0 / REWARD 0.1 / Q_MAX  0.471096 / Loss  0.0264258384705
loop took 0.2841835021972656 seconds
TIMESTEP 5990 / STATE

TIMESTEP 6034 / STATE explore / EPSILON 0.09981575110003556 / ACTION 0 / REWARD -1 / Q_MAX  0.443971 / Loss  0.0174655616283
loop took 0.28376030921936035 seconds
TIMESTEP 6035 / STATE explore / EPSILON 0.09981571780003556 / ACTION 0 / REWARD 0.1 / Q_MAX  0.381853 / Loss  0.0358318164945
loop took 0.2998039722442627 seconds
TIMESTEP 6036 / STATE explore / EPSILON 0.09981568450003557 / ACTION 0 / REWARD 0.1 / Q_MAX  0.701584 / Loss  0.0186898503453
loop took 0.30681324005126953 seconds
TIMESTEP 6037 / STATE explore / EPSILON 0.09981565120003558 / ACTION 0 / REWARD 0.1 / Q_MAX  0.409313 / Loss  0.0212267674506
loop took 0.2928645610809326 seconds
TIMESTEP 6038 / STATE explore / EPSILON 0.09981561790003558 / ACTION 0 / REWARD 0.1 / Q_MAX  0.181412 / Loss  0.025718588382
loop took 0.3098330497741699 seconds
TIMESTEP 6039 / STATE explore / EPSILON 0.09981558460003559 / ACTION 0 / REWARD 0.1 / Q_MAX  0.1469 / Loss  0.0159736424685
loop took 0.305450439453125 seconds
TIMESTEP 6040 / STATE exp

TIMESTEP 6084 / STATE explore / EPSILON 0.09981408610003588 / ACTION 0 / REWARD -1 / Q_MAX  0.258035 / Loss  0.0413161963224
loop took 0.30498719215393066 seconds
TIMESTEP 6085 / STATE explore / EPSILON 0.09981405280003589 / ACTION 0 / REWARD 0.1 / Q_MAX  0.368427 / Loss  0.0237087067217
loop took 0.2867603302001953 seconds
TIMESTEP 6086 / STATE explore / EPSILON 0.09981401950003589 / ACTION 0 / REWARD 0.1 / Q_MAX  0.519756 / Loss  0.0167387854308
loop took 0.31989049911499023 seconds
TIMESTEP 6087 / STATE explore / EPSILON 0.0998139862000359 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.32137 / Loss  0.0125707685947
loop took 0.30535101890563965 seconds
TIMESTEP 6088 / STATE explore / EPSILON 0.0998139529000359 / ACTION 0 / REWARD 0.1 / Q_MAX  0.255252 / Loss  0.012086475268
loop took 0.2973341941833496 seconds
TIMESTEP 6089 / STATE explore / EPSILON 0.09981391960003591 / ACTION 0 / REWARD 0.1 / Q_MAX  0.543429 / Loss  0.0140608083457
loop took 0.3168473243713379 seconds
TIMESTEP 6090 / STATE e

TIMESTEP 6135 / STATE explore / EPSILON 0.0998123878000362 / ACTION 0 / REWARD -1 / Q_MAX  0.281791 / Loss  0.0367297753692
loop took 0.29679179191589355 seconds
TIMESTEP 6136 / STATE explore / EPSILON 0.09981235450003621 / ACTION 0 / REWARD 0.1 / Q_MAX  0.392977 / Loss  0.00872313138098
loop took 0.3048841953277588 seconds
TIMESTEP 6137 / STATE explore / EPSILON 0.09981232120003622 / ACTION 0 / REWARD 0.1 / Q_MAX  0.478606 / Loss  0.0151764331385
loop took 0.28375816345214844 seconds
TIMESTEP 6138 / STATE explore / EPSILON 0.09981228790003623 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.357117 / Loss  0.0352119579911
loop took 0.28275513648986816 seconds
TIMESTEP 6139 / STATE explore / EPSILON 0.09981225460003623 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.338164 / Loss  0.0437842309475
loop took 0.31789135932922363 seconds
TIMESTEP 6140 / STATE explore / EPSILON 0.09981222130003624 / ACTION 0 / REWARD 0.1 / Q_MAX  0.242897 / Loss  0.020809378475
loop took 1.018583059310913 seconds
TIMESTEP 6141 / STA

TIMESTEP 6186 / STATE explore / EPSILON 0.09981068950003653 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.217221 / Loss  0.0191042367369
loop took 0.31501317024230957 seconds
TIMESTEP 6187 / STATE explore / EPSILON 0.09981065620003654 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0636321 / Loss  0.00811801012605
loop took 0.2834510803222656 seconds
TIMESTEP 6188 / STATE explore / EPSILON 0.09981062290003655 / ACTION 0 / REWARD 0.1 / Q_MAX  0.705886 / Loss  0.0120660569519
loop took 0.28376197814941406 seconds
TIMESTEP 6189 / STATE explore / EPSILON 0.09981058960003655 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.156511 / Loss  0.008445141837
loop took 0.2837526798248291 seconds
TIMESTEP 6190 / STATE explore / EPSILON 0.09981055630003656 / ACTION 0 / REWARD 0.1 / Q_MAX  0.366923 / Loss  0.0159357301891
loop took 0.32288384437561035 seconds
TIMESTEP 6191 / STATE explore / EPSILON 0.09981052300003657 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.228683 / Loss  0.0124335493892
loop took 0.3007943630218506 seconds
TIMESTEP 6192 

TIMESTEP 6237 / STATE explore / EPSILON 0.09980899120003686 / ACTION 0 / REWARD 0.1 / Q_MAX  0.424484 / Loss  0.0111768804491
loop took 0.3118293285369873 seconds
TIMESTEP 6238 / STATE explore / EPSILON 0.09980895790003687 / ACTION 0 / REWARD -1 / Q_MAX  0.488816 / Loss  0.00879854988307
loop took 0.331866979598999 seconds
TIMESTEP 6239 / STATE explore / EPSILON 0.09980892460003687 / ACTION 0 / REWARD 0.1 / Q_MAX  0.258605 / Loss  0.0349493138492
loop took 0.2767360210418701 seconds
TIMESTEP 6240 / STATE explore / EPSILON 0.09980889130003688 / ACTION 0 / REWARD 0.1 / Q_MAX  0.252906 / Loss  0.00802858918905
loop took 0.3138301372528076 seconds
TIMESTEP 6241 / STATE explore / EPSILON 0.09980885800003689 / ACTION 0 / REWARD 0.1 / Q_MAX  0.251411 / Loss  0.0207131654024
loop took 0.31783223152160645 seconds
TIMESTEP 6242 / STATE explore / EPSILON 0.0998088247000369 / ACTION 0 / REWARD 0.1 / Q_MAX  0.249378 / Loss  0.00809744186699
loop took 0.28174567222595215 seconds
TIMESTEP 6243 / STAT

TIMESTEP 6288 / STATE explore / EPSILON 0.09980729290003719 / ACTION 0 / REWARD 0.1 / Q_MAX  0.195834 / Loss  0.0256519578397
loop took 0.3442387580871582 seconds
TIMESTEP 6289 / STATE explore / EPSILON 0.0998072596000372 / ACTION 0 / REWARD -1 / Q_MAX  0.633429 / Loss  0.0078226197511
loop took 0.27648234367370605 seconds
TIMESTEP 6290 / STATE explore / EPSILON 0.0998072263000372 / ACTION 0 / REWARD 0.1 / Q_MAX  0.341885 / Loss  0.0216064471751
----------Random Action----------
loop took 0.3093721866607666 seconds
TIMESTEP 6291 / STATE explore / EPSILON 0.09980719300003721 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0242985 / Loss  0.0221804901958
loop took 0.30805397033691406 seconds
TIMESTEP 6292 / STATE explore / EPSILON 0.09980715970003722 / ACTION 0 / REWARD 0.1 / Q_MAX  0.640307 / Loss  0.0445887893438
loop took 0.2985551357269287 seconds
TIMESTEP 6293 / STATE explore / EPSILON 0.09980712640003722 / ACTION 0 / REWARD 0.1 / Q_MAX  0.620187 / Loss  0.00932407937944
loop took 0.284760236740

TIMESTEP 6338 / STATE explore / EPSILON 0.09980562790003751 / ACTION 0 / REWARD -1 / Q_MAX  0.117002 / Loss  0.00687544699758
loop took 0.31326961517333984 seconds
TIMESTEP 6339 / STATE explore / EPSILON 0.09980559460003752 / ACTION 0 / REWARD 0.1 / Q_MAX  0.239829 / Loss  0.0138644957915
loop took 0.3004162311553955 seconds
TIMESTEP 6340 / STATE explore / EPSILON 0.09980556130003752 / ACTION 0 / REWARD 0.1 / Q_MAX  0.624268 / Loss  0.0077011003159
loop took 1.0474233627319336 seconds
TIMESTEP 6341 / STATE explore / EPSILON 0.09980552800003753 / ACTION 1 / REWARD 0.1 / Q_MAX  -0.776509 / Loss  0.0165588390082
loop took 0.2697179317474365 seconds
TIMESTEP 6342 / STATE explore / EPSILON 0.09980549470003754 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.825468 / Loss  0.00846107676625
loop took 0.2656264305114746 seconds
TIMESTEP 6343 / STATE explore / EPSILON 0.09980546140003754 / ACTION 0 / REWARD -1 / Q_MAX  0.236007 / Loss  0.0137338647619
loop took 0.2646946907043457 seconds
TIMESTEP 6344 / STA

TIMESTEP 6388 / STATE explore / EPSILON 0.09980396290003783 / ACTION 0 / REWARD -1 / Q_MAX  0.239264 / Loss  0.0120597947389
loop took 0.3313908576965332 seconds
TIMESTEP 6389 / STATE explore / EPSILON 0.09980392960003784 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0937101 / Loss  0.022630173713
loop took 0.28174638748168945 seconds
TIMESTEP 6390 / STATE explore / EPSILON 0.09980389630003784 / ACTION 0 / REWARD 0.1 / Q_MAX  0.335379 / Loss  0.0367811024189
loop took 0.32488250732421875 seconds
TIMESTEP 6391 / STATE explore / EPSILON 0.09980386300003785 / ACTION 0 / REWARD 0.1 / Q_MAX  0.267732 / Loss  0.0313170067966
loop took 0.3129136562347412 seconds
TIMESTEP 6392 / STATE explore / EPSILON 0.09980382970003786 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0908301 / Loss  0.0230577923357
loop took 0.2937777042388916 seconds
TIMESTEP 6393 / STATE explore / EPSILON 0.09980379640003786 / ACTION 0 / REWARD 0.1 / Q_MAX  0.431342 / Loss  0.0114056877792
loop took 0.30676698684692383 seconds
TIMESTEP 6394 / ST

TIMESTEP 6439 / STATE explore / EPSILON 0.09980226460003816 / ACTION 0 / REWARD -1 / Q_MAX  -0.860942 / Loss  0.0365861952305
loop took 0.2977919578552246 seconds
TIMESTEP 6440 / STATE explore / EPSILON 0.09980223130003817 / ACTION 0 / REWARD 0.1 / Q_MAX  0.34769 / Loss  0.00859687756747
loop took 0.3018369674682617 seconds
TIMESTEP 6441 / STATE explore / EPSILON 0.09980219800003817 / ACTION 0 / REWARD 0.1 / Q_MAX  0.137116 / Loss  0.0496315658092
loop took 0.29679012298583984 seconds
TIMESTEP 6442 / STATE explore / EPSILON 0.09980216470003818 / ACTION 0 / REWARD 0.1 / Q_MAX  0.642793 / Loss  0.00824719760567
loop took 0.3088266849517822 seconds
TIMESTEP 6443 / STATE explore / EPSILON 0.09980213140003819 / ACTION 0 / REWARD 0.1 / Q_MAX  0.46017 / Loss  0.00633408455178
loop took 0.2897675037384033 seconds
TIMESTEP 6444 / STATE explore / EPSILON 0.09980209810003819 / ACTION 0 / REWARD 0.1 / Q_MAX  0.156253 / Loss  0.0109696872532
loop took 0.32787513732910156 seconds
TIMESTEP 6445 / STA

TIMESTEP 6490 / STATE explore / EPSILON 0.09980056630003849 / ACTION 0 / REWARD 0.1 / Q_MAX  0.288185 / Loss  0.0227714851499
loop took 0.3189353942871094 seconds
TIMESTEP 6491 / STATE explore / EPSILON 0.0998005330000385 / ACTION 0 / REWARD 0.1 / Q_MAX  0.74339 / Loss  0.0119880950078
loop took 0.29978489875793457 seconds
TIMESTEP 6492 / STATE explore / EPSILON 0.0998004997000385 / ACTION 0 / REWARD 0.1 / Q_MAX  0.460301 / Loss  0.0495073981583
loop took 0.2998363971710205 seconds
TIMESTEP 6493 / STATE explore / EPSILON 0.0998004664000385 / ACTION 0 / REWARD 0.1 / Q_MAX  0.401512 / Loss  0.0123000228778
loop took 0.3088245391845703 seconds
TIMESTEP 6494 / STATE explore / EPSILON 0.09980043310003851 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.297413 / Loss  0.04306108132
loop took 0.3018956184387207 seconds
TIMESTEP 6495 / STATE explore / EPSILON 0.09980039980003852 / ACTION 0 / REWARD 0.1 / Q_MAX  0.29509 / Loss  0.00721945241094
loop took 0.28818726539611816 seconds
TIMESTEP 6496 / STATE exp

TIMESTEP 6541 / STATE explore / EPSILON 0.09979886800003882 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.289391 / Loss  0.0329218618572
loop took 0.2997913360595703 seconds
TIMESTEP 6542 / STATE explore / EPSILON 0.09979883470003882 / ACTION 0 / REWARD 0.1 / Q_MAX  0.503132 / Loss  0.0265559032559
loop took 0.34292078018188477 seconds
TIMESTEP 6543 / STATE explore / EPSILON 0.09979880140003883 / ACTION 0 / REWARD 0.1 / Q_MAX  0.35972 / Loss  0.0309290252626
loop took 0.2887706756591797 seconds
TIMESTEP 6544 / STATE explore / EPSILON 0.09979876810003883 / ACTION 0 / REWARD 0.1 / Q_MAX  0.173172 / Loss  0.0204080082476
loop took 0.2837550640106201 seconds
TIMESTEP 6545 / STATE explore / EPSILON 0.09979873480003884 / ACTION 0 / REWARD -1 / Q_MAX  0.381334 / Loss  0.0169590022415
loop took 0.31258344650268555 seconds
TIMESTEP 6546 / STATE explore / EPSILON 0.09979870150003885 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.864295 / Loss  0.177356362343
loop took 0.30380845069885254 seconds
TIMESTEP 6547 / STAT

TIMESTEP 6592 / STATE explore / EPSILON 0.09979716970003914 / ACTION 0 / REWARD 0.1 / Q_MAX  0.331392 / Loss  0.00975505169481
loop took 0.2839221954345703 seconds
TIMESTEP 6593 / STATE explore / EPSILON 0.09979713640003915 / ACTION 0 / REWARD -1 / Q_MAX  0.930206 / Loss  0.0261005610228
loop took 0.3191077709197998 seconds
TIMESTEP 6594 / STATE explore / EPSILON 0.09979710310003916 / ACTION 0 / REWARD 0.1 / Q_MAX  0.223936 / Loss  0.0174468755722
loop took 0.2977874279022217 seconds
TIMESTEP 6595 / STATE explore / EPSILON 0.09979706980003916 / ACTION 0 / REWARD 0.1 / Q_MAX  0.305044 / Loss  0.0160409361124
loop took 0.3112020492553711 seconds
TIMESTEP 6596 / STATE explore / EPSILON 0.09979703650003917 / ACTION 0 / REWARD 0.1 / Q_MAX  0.360133 / Loss  0.00622526556253
loop took 0.31684255599975586 seconds
TIMESTEP 6597 / STATE explore / EPSILON 0.09979700320003917 / ACTION 0 / REWARD 0.1 / Q_MAX  0.325488 / Loss  0.0317937172949
loop took 0.30360889434814453 seconds
TIMESTEP 6598 / STA

TIMESTEP 6642 / STATE explore / EPSILON 0.09979550470003946 / ACTION 0 / REWARD 0.1 / Q_MAX  0.281652 / Loss  0.0156820099801
loop took 0.2977886199951172 seconds
TIMESTEP 6643 / STATE explore / EPSILON 0.09979547140003947 / ACTION 0 / REWARD 0.1 / Q_MAX  0.183788 / Loss  0.00493999524042
loop took 0.28376078605651855 seconds
TIMESTEP 6644 / STATE explore / EPSILON 0.09979543810003948 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.354164 / Loss  0.0120562911034
loop took 0.3096027374267578 seconds
TIMESTEP 6645 / STATE explore / EPSILON 0.09979540480003948 / ACTION 0 / REWARD 0.1 / Q_MAX  0.458518 / Loss  0.019524730742
loop took 0.2739129066467285 seconds
TIMESTEP 6646 / STATE explore / EPSILON 0.09979537150003949 / ACTION 0 / REWARD -1 / Q_MAX  0.207986 / Loss  0.0152012500912
loop took 0.299058198928833 seconds
TIMESTEP 6647 / STATE explore / EPSILON 0.0997953382000395 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0469122 / Loss  0.0233351513743
loop took 0.3068523406982422 seconds
TIMESTEP 6648 / STATE 

TIMESTEP 6692 / STATE explore / EPSILON 0.09979383970003979 / ACTION 0 / REWARD 0.1 / Q_MAX  0.292637 / Loss  0.0141429249197
loop took 0.33188509941101074 seconds
TIMESTEP 6693 / STATE explore / EPSILON 0.09979380640003979 / ACTION 0 / REWARD 0.1 / Q_MAX  0.402231 / Loss  0.0116453692317
loop took 0.31555676460266113 seconds
TIMESTEP 6694 / STATE explore / EPSILON 0.0997937731000398 / ACTION 0 / REWARD 0.1 / Q_MAX  0.327759 / Loss  0.00595068372786
loop took 0.2837526798248291 seconds
TIMESTEP 6695 / STATE explore / EPSILON 0.0997937398000398 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.559309 / Loss  0.021792165935
loop took 0.28376102447509766 seconds
TIMESTEP 6696 / STATE explore / EPSILON 0.09979370650003981 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0532685 / Loss  0.0109278466552
loop took 0.28375244140625 seconds
TIMESTEP 6697 / STATE explore / EPSILON 0.09979367320003982 / ACTION 0 / REWARD 0.1 / Q_MAX  0.10936 / Loss  0.0201970282942
loop took 0.29742860794067383 seconds
TIMESTEP 6698 / STAT

TIMESTEP 6743 / STATE explore / EPSILON 0.09979214140004011 / ACTION 0 / REWARD 0.1 / Q_MAX  0.367587 / Loss  0.0138621097431
loop took 0.27326369285583496 seconds
TIMESTEP 6744 / STATE explore / EPSILON 0.09979210810004012 / ACTION 0 / REWARD -1 / Q_MAX  0.202773 / Loss  0.0419519990683
loop took 0.2827455997467041 seconds
TIMESTEP 6745 / STATE explore / EPSILON 0.09979207480004013 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.107138 / Loss  0.0259485132992
loop took 0.28475356101989746 seconds
TIMESTEP 6746 / STATE explore / EPSILON 0.09979204150004013 / ACTION 0 / REWARD 0.1 / Q_MAX  0.271822 / Loss  0.0239575989544
loop took 0.28524208068847656 seconds
TIMESTEP 6747 / STATE explore / EPSILON 0.09979200820004014 / ACTION 0 / REWARD 0.1 / Q_MAX  0.209551 / Loss  0.00399287929758
loop took 0.3108174800872803 seconds
TIMESTEP 6748 / STATE explore / EPSILON 0.09979197490004015 / ACTION 0 / REWARD 0.1 / Q_MAX  0.216346 / Loss  0.0238464809954
loop took 0.2727229595184326 seconds
TIMESTEP 6749 / ST

TIMESTEP 6793 / STATE explore / EPSILON 0.09979047640004043 / ACTION 0 / REWARD -1 / Q_MAX  0.544754 / Loss  0.0147478338331
loop took 0.287506103515625 seconds
TIMESTEP 6794 / STATE explore / EPSILON 0.09979044310004044 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.840379 / Loss  0.0428731516004
loop took 0.2991185188293457 seconds
TIMESTEP 6795 / STATE explore / EPSILON 0.09979040980004045 / ACTION 0 / REWARD 0.1 / Q_MAX  0.571152 / Loss  0.128638088703
loop took 0.29955315589904785 seconds
TIMESTEP 6796 / STATE explore / EPSILON 0.09979037650004045 / ACTION 0 / REWARD 0.1 / Q_MAX  0.291788 / Loss  0.0227830354124
loop took 0.2837522029876709 seconds
TIMESTEP 6797 / STATE explore / EPSILON 0.09979034320004046 / ACTION 0 / REWARD 0.1 / Q_MAX  0.281947 / Loss  0.022677840665
loop took 0.2838006019592285 seconds
TIMESTEP 6798 / STATE explore / EPSILON 0.09979030990004047 / ACTION 0 / REWARD 0.1 / Q_MAX  0.270436 / Loss  0.00934725999832
loop took 0.29745006561279297 seconds
TIMESTEP 6799 / STATE 

TIMESTEP 6844 / STATE explore / EPSILON 0.09978877810004076 / ACTION 0 / REWARD 0.1 / Q_MAX  0.613786 / Loss  0.010261528194
loop took 0.2867615222930908 seconds
TIMESTEP 6845 / STATE explore / EPSILON 0.09978874480004077 / ACTION 0 / REWARD 0.1 / Q_MAX  0.36462 / Loss  0.0234344378114
loop took 0.31155848503112793 seconds
TIMESTEP 6846 / STATE explore / EPSILON 0.09978871150004077 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0657586 / Loss  0.0112972920761
loop took 0.28376007080078125 seconds
TIMESTEP 6847 / STATE explore / EPSILON 0.09978867820004078 / ACTION 0 / REWARD 0.1 / Q_MAX  0.338207 / Loss  0.0291869081557
loop took 0.28275275230407715 seconds
TIMESTEP 6848 / STATE explore / EPSILON 0.09978864490004079 / ACTION 0 / REWARD 0.1 / Q_MAX  1.14565 / Loss  0.0129657443613
loop took 0.29979729652404785 seconds
TIMESTEP 6849 / STATE explore / EPSILON 0.0997886116000408 / ACTION 0 / REWARD 0.1 / Q_MAX  0.323339 / Loss  0.0114806238562
loop took 0.2837495803833008 seconds
TIMESTEP 6850 / STAT

TIMESTEP 6895 / STATE explore / EPSILON 0.09978707980004109 / ACTION 0 / REWARD 0.1 / Q_MAX  0.415 / Loss  0.00708871986717
loop took 0.30080628395080566 seconds
TIMESTEP 6896 / STATE explore / EPSILON 0.0997870465000411 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.337155 / Loss  0.0115484287962
loop took 0.3168768882751465 seconds
TIMESTEP 6897 / STATE explore / EPSILON 0.0997870132000411 / ACTION 0 / REWARD 0.1 / Q_MAX  0.356911 / Loss  0.00751981325448
loop took 0.3109166622161865 seconds
TIMESTEP 6898 / STATE explore / EPSILON 0.09978697990004111 / ACTION 0 / REWARD 0.1 / Q_MAX  0.249459 / Loss  0.0048980419524
loop took 0.31780076026916504 seconds
TIMESTEP 6899 / STATE explore / EPSILON 0.09978694660004112 / ACTION 0 / REWARD 0.1 / Q_MAX  0.25285 / Loss  0.0154727436602
loop took 0.3168830871582031 seconds
TIMESTEP 6900 / STATE explore / EPSILON 0.09978691330004112 / ACTION 0 / REWARD 0.1 / Q_MAX  0.632909 / Loss  0.0112111130729
loop took 0.31017446517944336 seconds
TIMESTEP 6901 / STATE 

TIMESTEP 6946 / STATE explore / EPSILON 0.09978538150004142 / ACTION 0 / REWARD 0.1 / Q_MAX  0.479404 / Loss  0.0190772805363
loop took 0.309734582901001 seconds
TIMESTEP 6947 / STATE explore / EPSILON 0.09978534820004142 / ACTION 0 / REWARD 0.1 / Q_MAX  0.521361 / Loss  0.00955754332244
loop took 0.3158426284790039 seconds
TIMESTEP 6948 / STATE explore / EPSILON 0.09978531490004143 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.83104 / Loss  0.0157323107123
loop took 0.315335750579834 seconds
TIMESTEP 6949 / STATE explore / EPSILON 0.09978528160004144 / ACTION 0 / REWARD 0.1 / Q_MAX  0.344332 / Loss  0.0344896875322
loop took 0.2742600440979004 seconds
TIMESTEP 6950 / STATE explore / EPSILON 0.09978524830004144 / ACTION 0 / REWARD -1 / Q_MAX  0.286143 / Loss  0.0131127415225
loop took 0.2992115020751953 seconds
TIMESTEP 6951 / STATE explore / EPSILON 0.09978521500004145 / ACTION 0 / REWARD 0.1 / Q_MAX  0.289365 / Loss  0.102811813354
loop took 0.2995903491973877 seconds
TIMESTEP 6952 / STATE exp

TIMESTEP 6997 / STATE explore / EPSILON 0.09978368320004175 / ACTION 0 / REWARD -1 / Q_MAX  0.137102 / Loss  0.00720131536946
loop took 0.30942559242248535 seconds
TIMESTEP 6998 / STATE explore / EPSILON 0.09978364990004175 / ACTION 0 / REWARD 0.1 / Q_MAX  0.305787 / Loss  0.00679016951472
loop took 0.3048076629638672 seconds
TIMESTEP 6999 / STATE explore / EPSILON 0.09978361660004176 / ACTION 0 / REWARD 0.1 / Q_MAX  0.438446 / Loss  0.0343526788056
loop took 0.3005056381225586 seconds
Now we save model
TIMESTEP 7000 / STATE explore / EPSILON 0.09978358330004176 / ACTION 0 / REWARD 0.1 / Q_MAX  0.152856 / Loss  0.0122403819114
loop took 0.3448820114135742 seconds
TIMESTEP 7001 / STATE explore / EPSILON 0.09978355000004177 / ACTION 0 / REWARD 0.1 / Q_MAX  0.379763 / Loss  0.0132637470961
loop took 0.3133862018585205 seconds
TIMESTEP 7002 / STATE explore / EPSILON 0.09978351670004178 / ACTION 0 / REWARD 0.1 / Q_MAX  0.442502 / Loss  0.018766913563
loop took 0.30629539489746094 seconds
TI

TIMESTEP 7048 / STATE explore / EPSILON 0.09978198490004207 / ACTION 0 / REWARD 0.1 / Q_MAX  0.228097 / Loss  0.00892784073949
loop took 0.3319559097290039 seconds
TIMESTEP 7049 / STATE explore / EPSILON 0.09978195160004208 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.649037 / Loss  0.0137323513627
loop took 0.30773258209228516 seconds
TIMESTEP 7050 / STATE explore / EPSILON 0.09978191830004209 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.092139 / Loss  0.00759801035747
loop took 0.3003063201904297 seconds
TIMESTEP 7051 / STATE explore / EPSILON 0.09978188500004209 / ACTION 0 / REWARD 0.1 / Q_MAX  0.25875 / Loss  0.0608686245978
loop took 0.3108811378479004 seconds
TIMESTEP 7052 / STATE explore / EPSILON 0.0997818517000421 / ACTION 0 / REWARD 0.1 / Q_MAX  0.344433 / Loss  0.0236736945808
loop took 0.2882065773010254 seconds
TIMESTEP 7053 / STATE explore / EPSILON 0.0997818184000421 / ACTION 0 / REWARD -1 / Q_MAX  0.396386 / Loss  0.0426977425814
loop took 0.31885838508605957 seconds
TIMESTEP 7054 / STAT

TIMESTEP 7099 / STATE explore / EPSILON 0.0997802866000424 / ACTION 0 / REWARD 0.1 / Q_MAX  0.703648 / Loss  0.0713942274451
loop took 0.30080246925354004 seconds
TIMESTEP 7100 / STATE explore / EPSILON 0.0997802533000424 / ACTION 0 / REWARD 0.1 / Q_MAX  0.608227 / Loss  0.017158318311
loop took 0.29979705810546875 seconds
TIMESTEP 7101 / STATE explore / EPSILON 0.09978022000004241 / ACTION 0 / REWARD 0.1 / Q_MAX  0.330778 / Loss  0.0490416809916
loop took 0.3248932361602783 seconds
TIMESTEP 7102 / STATE explore / EPSILON 0.09978018670004242 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.853908 / Loss  0.0116727314889
loop took 0.2907681465148926 seconds
TIMESTEP 7103 / STATE explore / EPSILON 0.09978015340004243 / ACTION 0 / REWARD 0.1 / Q_MAX  0.277666 / Loss  0.0285064186901
loop took 0.3168487548828125 seconds
TIMESTEP 7104 / STATE explore / EPSILON 0.09978012010004243 / ACTION 0 / REWARD 0.1 / Q_MAX  0.681149 / Loss  0.0282976999879
loop took 0.3132333755493164 seconds
TIMESTEP 7105 / STATE 

TIMESTEP 7150 / STATE explore / EPSILON 0.09977858830004273 / ACTION 0 / REWARD -1 / Q_MAX  0.147879 / Loss  0.00685907714069
loop took 0.3279712200164795 seconds
TIMESTEP 7151 / STATE explore / EPSILON 0.09977855500004273 / ACTION 0 / REWARD 0.1 / Q_MAX  0.139272 / Loss  0.00633572228253
loop took 0.2998356819152832 seconds
TIMESTEP 7152 / STATE explore / EPSILON 0.09977852170004274 / ACTION 0 / REWARD 0.1 / Q_MAX  0.134017 / Loss  0.0118900900707
loop took 0.31485939025878906 seconds
TIMESTEP 7153 / STATE explore / EPSILON 0.09977848840004275 / ACTION 0 / REWARD 0.1 / Q_MAX  0.132591 / Loss  0.0134167224169
loop took 0.29077601432800293 seconds
TIMESTEP 7154 / STATE explore / EPSILON 0.09977845510004275 / ACTION 0 / REWARD 0.1 / Q_MAX  0.135367 / Loss  0.0119326021522
loop took 0.3096737861633301 seconds
TIMESTEP 7155 / STATE explore / EPSILON 0.09977842180004276 / ACTION 0 / REWARD 0.1 / Q_MAX  1.70108 / Loss  0.0183711033314
loop took 0.2897958755493164 seconds
TIMESTEP 7156 / STAT

TIMESTEP 7201 / STATE explore / EPSILON 0.09977689000004306 / ACTION 0 / REWARD 0.1 / Q_MAX  0.539301 / Loss  0.0069112512283
loop took 0.32227182388305664 seconds
TIMESTEP 7202 / STATE explore / EPSILON 0.09977685670004306 / ACTION 0 / REWARD 0.1 / Q_MAX  0.108954 / Loss  0.00574385607615
loop took 0.3113088607788086 seconds
TIMESTEP 7203 / STATE explore / EPSILON 0.09977682340004307 / ACTION 0 / REWARD 0.1 / Q_MAX  0.110379 / Loss  0.021851144731
loop took 0.2867598533630371 seconds
TIMESTEP 7204 / STATE explore / EPSILON 0.09977679010004308 / ACTION 0 / REWARD 0.1 / Q_MAX  0.118963 / Loss  0.0104621648788
loop took 0.2847607135772705 seconds
TIMESTEP 7205 / STATE explore / EPSILON 0.09977675680004308 / ACTION 0 / REWARD 0.1 / Q_MAX  0.133991 / Loss  0.0120150055736
loop took 0.30280494689941406 seconds
TIMESTEP 7206 / STATE explore / EPSILON 0.09977672350004309 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.928561 / Loss  0.0156566053629
loop took 0.29683780670166016 seconds
TIMESTEP 7207 / ST

TIMESTEP 7252 / STATE explore / EPSILON 0.09977519170004338 / ACTION 0 / REWARD 0.1 / Q_MAX  0.191689 / Loss  0.0072023132816
loop took 0.3138387203216553 seconds
TIMESTEP 7253 / STATE explore / EPSILON 0.09977515840004339 / ACTION 0 / REWARD 0.1 / Q_MAX  0.329684 / Loss  0.0208149608225
loop took 0.2977907657623291 seconds
TIMESTEP 7254 / STATE explore / EPSILON 0.0997751251000434 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.121066 / Loss  0.0109090330079
loop took 0.3055264949798584 seconds
TIMESTEP 7255 / STATE explore / EPSILON 0.0997750918000434 / ACTION 0 / REWARD 0.1 / Q_MAX  0.169061 / Loss  0.0244381353259
loop took 0.310302734375 seconds
TIMESTEP 7256 / STATE explore / EPSILON 0.09977505850004341 / ACTION 0 / REWARD 0.1 / Q_MAX  0.830791 / Loss  0.0510771721601
loop took 0.3058042526245117 seconds
TIMESTEP 7257 / STATE explore / EPSILON 0.09977502520004342 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.45886 / Loss  0.0294754821807
loop took 0.2847604751586914 seconds
TIMESTEP 7258 / STATE explo

TIMESTEP 7303 / STATE explore / EPSILON 0.09977349340004371 / ACTION 0 / REWARD 0.1 / Q_MAX  0.413541 / Loss  0.0139674050733
loop took 0.3090188503265381 seconds
TIMESTEP 7304 / STATE explore / EPSILON 0.09977346010004372 / ACTION 0 / REWARD 0.1 / Q_MAX  0.101814 / Loss  0.0116469860077
loop took 0.30692505836486816 seconds
TIMESTEP 7305 / STATE explore / EPSILON 0.09977342680004372 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.696659 / Loss  0.0201167166233
loop took 0.28375935554504395 seconds
TIMESTEP 7306 / STATE explore / EPSILON 0.09977339350004373 / ACTION 0 / REWARD 0.1 / Q_MAX  0.339035 / Loss  0.0116701647639
loop took 0.3166923522949219 seconds
TIMESTEP 7307 / STATE explore / EPSILON 0.09977336020004374 / ACTION 0 / REWARD -1 / Q_MAX  0.0584965 / Loss  0.0188299361616
loop took 0.3108255863189697 seconds
TIMESTEP 7308 / STATE explore / EPSILON 0.09977332690004374 / ACTION 0 / REWARD 0.1 / Q_MAX  0.559907 / Loss  0.0151073168963
loop took 0.32030296325683594 seconds
TIMESTEP 7309 / ST

TIMESTEP 7354 / STATE explore / EPSILON 0.09977179510004404 / ACTION 0 / REWARD 0.1 / Q_MAX  0.36455 / Loss  0.0144543666393
loop took 0.26503610610961914 seconds
TIMESTEP 7355 / STATE explore / EPSILON 0.09977176180004405 / ACTION 0 / REWARD -1 / Q_MAX  -0.611466 / Loss  0.0147931789979
loop took 0.28296661376953125 seconds
TIMESTEP 7356 / STATE explore / EPSILON 0.09977172850004405 / ACTION 0 / REWARD 0.1 / Q_MAX  0.491162 / Loss  0.0111911129206
loop took 0.28475356101989746 seconds
TIMESTEP 7357 / STATE explore / EPSILON 0.09977169520004406 / ACTION 0 / REWARD 0.1 / Q_MAX  0.70878 / Loss  0.0119092827663
loop took 0.2981855869293213 seconds
TIMESTEP 7358 / STATE explore / EPSILON 0.09977166190004406 / ACTION 0 / REWARD 0.1 / Q_MAX  0.718697 / Loss  0.0246601104736
loop took 0.31383824348449707 seconds
TIMESTEP 7359 / STATE explore / EPSILON 0.09977162860004407 / ACTION 0 / REWARD 0.1 / Q_MAX  0.25675 / Loss  0.00627266429365
loop took 0.2877662181854248 seconds
TIMESTEP 7360 / STAT

TIMESTEP 7405 / STATE explore / EPSILON 0.09977009680004437 / ACTION 0 / REWARD 0.1 / Q_MAX  0.374668 / Loss  0.021086871624
loop took 0.28876781463623047 seconds
TIMESTEP 7406 / STATE explore / EPSILON 0.09977006350004437 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0992163 / Loss  0.0231833532453
loop took 0.29361915588378906 seconds
TIMESTEP 7407 / STATE explore / EPSILON 0.09977003020004438 / ACTION 0 / REWARD -1 / Q_MAX  1.1338 / Loss  0.00545823760331
loop took 0.2887091636657715 seconds
TIMESTEP 7408 / STATE explore / EPSILON 0.09976999690004439 / ACTION 0 / REWARD 0.1 / Q_MAX  0.286059 / Loss  0.00659280922264
loop took 0.28275299072265625 seconds
TIMESTEP 7409 / STATE explore / EPSILON 0.09976996360004439 / ACTION 0 / REWARD 0.1 / Q_MAX  0.603301 / Loss  0.0187458582222
loop took 0.2837541103363037 seconds
TIMESTEP 7410 / STATE explore / EPSILON 0.0997699303000444 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.166473 / Loss  0.0150769585744
loop took 0.2992558479309082 seconds
TIMESTEP 7411 / STAT

TIMESTEP 7455 / STATE explore / EPSILON 0.09976843180004469 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.479889 / Loss  0.00818498712033
loop took 0.31986260414123535 seconds
TIMESTEP 7456 / STATE explore / EPSILON 0.0997683985000447 / ACTION 0 / REWARD 0.1 / Q_MAX  0.292136 / Loss  0.0121660968289
loop took 0.31172728538513184 seconds
TIMESTEP 7457 / STATE explore / EPSILON 0.0997683652000447 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0386617 / Loss  0.0912872850895
loop took 0.3338632583618164 seconds
TIMESTEP 7458 / STATE explore / EPSILON 0.09976833190004471 / ACTION 0 / REWARD 0.1 / Q_MAX  0.445175 / Loss  0.0198047533631
loop took 0.30469274520874023 seconds
TIMESTEP 7459 / STATE explore / EPSILON 0.09976829860004471 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.722633 / Loss  0.0197480805218
loop took 0.2947862148284912 seconds
TIMESTEP 7460 / STATE explore / EPSILON 0.09976826530004472 / ACTION 0 / REWARD 0.1 / Q_MAX  0.27232 / Loss  0.0176958013326
loop took 0.2994842529296875 seconds
TIMESTEP 7461 / ST

TIMESTEP 7506 / STATE explore / EPSILON 0.09976673350004502 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.289753 / Loss  0.0194309465587
loop took 0.3018004894256592 seconds
TIMESTEP 7507 / STATE explore / EPSILON 0.09976670020004502 / ACTION 0 / REWARD 0.1 / Q_MAX  4.59627 / Loss  0.504252612591
loop took 0.29678773880004883 seconds
TIMESTEP 7508 / STATE explore / EPSILON 0.09976666690004503 / ACTION 0 / REWARD 0.1 / Q_MAX  0.292628 / Loss  0.00896626897156
loop took 0.309312105178833 seconds
TIMESTEP 7509 / STATE explore / EPSILON 0.09976663360004503 / ACTION 0 / REWARD 0.1 / Q_MAX  0.588293 / Loss  0.0421912223101
loop took 0.30739378929138184 seconds
TIMESTEP 7510 / STATE explore / EPSILON 0.09976660030004504 / ACTION 0 / REWARD -1 / Q_MAX  -0.79478 / Loss  0.0105050988495
----------Random Action----------
loop took 0.29277515411376953 seconds
TIMESTEP 7511 / STATE explore / EPSILON 0.09976656700004505 / ACTION 0 / REWARD 0.1 / Q_MAX  0.689864 / Loss  0.0726873129606
loop took 0.319736957550

TIMESTEP 7556 / STATE explore / EPSILON 0.09976506850004534 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0466582 / Loss  0.0126343583688
loop took 0.3148379325866699 seconds
TIMESTEP 7557 / STATE explore / EPSILON 0.09976503520004534 / ACTION 0 / REWARD 0.1 / Q_MAX  0.117481 / Loss  0.0172771774232
loop took 0.29788756370544434 seconds
TIMESTEP 7558 / STATE explore / EPSILON 0.09976500190004535 / ACTION 0 / REWARD 0.1 / Q_MAX  0.148151 / Loss  0.0267981551588
loop took 0.30884647369384766 seconds
TIMESTEP 7559 / STATE explore / EPSILON 0.09976496860004536 / ACTION 0 / REWARD 0.1 / Q_MAX  0.406675 / Loss  0.00490584038198
loop took 0.2998385429382324 seconds
TIMESTEP 7560 / STATE explore / EPSILON 0.09976493530004536 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.932339 / Loss  0.0137258926407
loop took 1.0612084865570068 seconds
TIMESTEP 7561 / STATE explore / EPSILON 0.09976490200004537 / ACTION 1 / REWARD 0.1 / Q_MAX  0.101134 / Loss  0.0151900080964
loop took 0.2655766010284424 seconds
TIMESTEP 7562 / S

TIMESTEP 7607 / STATE explore / EPSILON 0.09976337020004566 / ACTION 0 / REWARD 0.1 / Q_MAX  0.438462 / Loss  0.0210750177503
loop took 0.29380345344543457 seconds
TIMESTEP 7608 / STATE explore / EPSILON 0.09976333690004567 / ACTION 0 / REWARD 0.1 / Q_MAX  0.149721 / Loss  0.00911407545209
loop took 0.2902247905731201 seconds
TIMESTEP 7609 / STATE explore / EPSILON 0.09976330360004568 / ACTION 0 / REWARD -1 / Q_MAX  0.316446 / Loss  0.01421607472
loop took 0.29257750511169434 seconds
TIMESTEP 7610 / STATE explore / EPSILON 0.09976327030004568 / ACTION 0 / REWARD 0.1 / Q_MAX  0.178385 / Loss  0.0156911350787
loop took 0.3260061740875244 seconds
TIMESTEP 7611 / STATE explore / EPSILON 0.09976323700004569 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.035805 / Loss  0.0300930812955
loop took 0.3123047351837158 seconds
TIMESTEP 7612 / STATE explore / EPSILON 0.0997632037000457 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0258283 / Loss  0.00620969664305
loop took 0.2930142879486084 seconds
TIMESTEP 7613 / STAT

TIMESTEP 7657 / STATE explore / EPSILON 0.09976170520004599 / ACTION 0 / REWARD 0.1 / Q_MAX  1.11416 / Loss  0.00811757519841
loop took 0.2747306823730469 seconds
TIMESTEP 7658 / STATE explore / EPSILON 0.09976167190004599 / ACTION 0 / REWARD 0.1 / Q_MAX  0.664488 / Loss  0.0531831867993
loop took 0.28275346755981445 seconds
TIMESTEP 7659 / STATE explore / EPSILON 0.099761638600046 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.687556 / Loss  0.0448707416654
loop took 0.29691052436828613 seconds
TIMESTEP 7660 / STATE explore / EPSILON 0.099761605300046 / ACTION 0 / REWARD 0.1 / Q_MAX  0.611394 / Loss  0.0177592206746
loop took 1.0418951511383057 seconds
TIMESTEP 7661 / STATE explore / EPSILON 0.09976157200004601 / ACTION 1 / REWARD -1 / Q_MAX  0.579495 / Loss  0.011092110537
loop took 0.275646448135376 seconds
TIMESTEP 7662 / STATE explore / EPSILON 0.09976153870004602 / ACTION 0 / REWARD 0.1 / Q_MAX  0.586564 / Loss  0.0406439900398
loop took 0.26470398902893066 seconds
TIMESTEP 7663 / STATE exp

TIMESTEP 7708 / STATE explore / EPSILON 0.09976000690004631 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.170895 / Loss  0.0175379831344
loop took 0.29886722564697266 seconds
TIMESTEP 7709 / STATE explore / EPSILON 0.09975997360004632 / ACTION 0 / REWARD 0.1 / Q_MAX  0.246109 / Loss  0.0202726125717
loop took 0.2985398769378662 seconds
TIMESTEP 7710 / STATE explore / EPSILON 0.09975994030004633 / ACTION 0 / REWARD 0.1 / Q_MAX  0.719121 / Loss  0.0104241706431
loop took 1.0460789203643799 seconds
TIMESTEP 7711 / STATE explore / EPSILON 0.09975990700004633 / ACTION 1 / REWARD -1 / Q_MAX  0.58429 / Loss  0.0295459460467
loop took 0.2716407775878906 seconds
TIMESTEP 7712 / STATE explore / EPSILON 0.09975987370004634 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0187078 / Loss  0.00892504304647
loop took 0.30776286125183105 seconds
TIMESTEP 7713 / STATE explore / EPSILON 0.09975984040004635 / ACTION 0 / REWARD 0.1 / Q_MAX  0.65741 / Loss  0.0211775470525
loop took 0.27172017097473145 seconds
TIMESTEP 7714 / ST

TIMESTEP 7758 / STATE explore / EPSILON 0.09975834190004663 / ACTION 0 / REWARD 0.1 / Q_MAX  0.251973 / Loss  0.0452207624912
loop took 0.28275561332702637 seconds
TIMESTEP 7759 / STATE explore / EPSILON 0.09975830860004664 / ACTION 0 / REWARD 0.1 / Q_MAX  0.537587 / Loss  0.0196942351758
loop took 0.30702662467956543 seconds
TIMESTEP 7760 / STATE explore / EPSILON 0.09975827530004665 / ACTION 0 / REWARD -1 / Q_MAX  0.275693 / Loss  0.0122498916462
loop took 0.31284141540527344 seconds
TIMESTEP 7761 / STATE explore / EPSILON 0.09975824200004665 / ACTION 0 / REWARD 0.1 / Q_MAX  0.338538 / Loss  0.0229798164219
loop took 0.2777059078216553 seconds
TIMESTEP 7762 / STATE explore / EPSILON 0.09975820870004666 / ACTION 0 / REWARD 0.1 / Q_MAX  0.634611 / Loss  0.0137254912406
loop took 0.3178420066833496 seconds
TIMESTEP 7763 / STATE explore / EPSILON 0.09975817540004667 / ACTION 0 / REWARD 0.1 / Q_MAX  0.286051 / Loss  0.00694997608662
loop took 0.28375840187072754 seconds
TIMESTEP 7764 / ST

TIMESTEP 7808 / STATE explore / EPSILON 0.09975667690004696 / ACTION 0 / REWARD 0.1 / Q_MAX  0.257373 / Loss  0.0121087413281
loop took 0.27575135231018066 seconds
TIMESTEP 7809 / STATE explore / EPSILON 0.09975664360004696 / ACTION 0 / REWARD -1 / Q_MAX  0.269862 / Loss  0.0220609959215
loop took 0.2978482246398926 seconds
TIMESTEP 7810 / STATE explore / EPSILON 0.09975661030004697 / ACTION 0 / REWARD 0.1 / Q_MAX  0.591365 / Loss  0.043187789619
loop took 0.3029050827026367 seconds
TIMESTEP 7811 / STATE explore / EPSILON 0.09975657700004698 / ACTION 0 / REWARD 0.1 / Q_MAX  0.29008 / Loss  0.0184167772532
loop took 0.2968251705169678 seconds
TIMESTEP 7812 / STATE explore / EPSILON 0.09975654370004698 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.522675 / Loss  0.0215617716312
loop took 0.304807186126709 seconds
TIMESTEP 7813 / STATE explore / EPSILON 0.09975651040004699 / ACTION 0 / REWARD 0.1 / Q_MAX  0.22811 / Loss  0.01363199763
loop took 0.2847614288330078 seconds
TIMESTEP 7814 / STATE explo

TIMESTEP 7859 / STATE explore / EPSILON 0.09975497860004728 / ACTION 0 / REWARD -1 / Q_MAX  0.275026 / Loss  0.0167440269142
loop took 0.2907724380493164 seconds
TIMESTEP 7860 / STATE explore / EPSILON 0.09975494530004729 / ACTION 0 / REWARD 0.1 / Q_MAX  0.634122 / Loss  0.0214675385505
loop took 0.3008308410644531 seconds
TIMESTEP 7861 / STATE explore / EPSILON 0.0997549120000473 / ACTION 0 / REWARD 0.1 / Q_MAX  0.509552 / Loss  0.0255769658834
loop took 0.31028270721435547 seconds
TIMESTEP 7862 / STATE explore / EPSILON 0.0997548787000473 / ACTION 0 / REWARD 0.1 / Q_MAX  1.00496 / Loss  0.0107971867546
loop took 0.30480241775512695 seconds
TIMESTEP 7863 / STATE explore / EPSILON 0.09975484540004731 / ACTION 0 / REWARD 0.1 / Q_MAX  0.29989 / Loss  0.00464616436511
loop took 0.3028144836425781 seconds
TIMESTEP 7864 / STATE explore / EPSILON 0.09975481210004732 / ACTION 0 / REWARD 0.1 / Q_MAX  0.169411 / Loss  0.00906449370086
loop took 0.29790425300598145 seconds
TIMESTEP 7865 / STATE 

TIMESTEP 7909 / STATE explore / EPSILON 0.0997533136000476 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.818747 / Loss  0.0110308565199
loop took 0.29785871505737305 seconds
TIMESTEP 7910 / STATE explore / EPSILON 0.09975328030004761 / ACTION 0 / REWARD 0.1 / Q_MAX  0.153124 / Loss  0.0228157062083
loop took 1.0473182201385498 seconds
TIMESTEP 7911 / STATE explore / EPSILON 0.09975324700004762 / ACTION 1 / REWARD 0.1 / Q_MAX  0.679547 / Loss  0.0180620737374
loop took 0.27207088470458984 seconds
TIMESTEP 7912 / STATE explore / EPSILON 0.09975321370004762 / ACTION 0 / REWARD 0.1 / Q_MAX  0.152408 / Loss  0.0198004618287
loop took 0.2655634880065918 seconds
TIMESTEP 7913 / STATE explore / EPSILON 0.09975318040004763 / ACTION 0 / REWARD -1 / Q_MAX  0.221762 / Loss  0.0188619121909
loop took 0.2667083740234375 seconds
TIMESTEP 7914 / STATE explore / EPSILON 0.09975314710004764 / ACTION 0 / REWARD 0.1 / Q_MAX  0.186302 / Loss  0.0508597455919
loop took 0.2677187919616699 seconds
TIMESTEP 7915 / STATE

TIMESTEP 7959 / STATE explore / EPSILON 0.09975164860004793 / ACTION 0 / REWARD 0.1 / Q_MAX  0.175574 / Loss  0.0165462493896
loop took 0.30181026458740234 seconds
TIMESTEP 7960 / STATE explore / EPSILON 0.09975161530004793 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0554801 / Loss  0.0179050862789
loop took 1.0117690563201904 seconds
TIMESTEP 7961 / STATE explore / EPSILON 0.09975158200004794 / ACTION 1 / REWARD -1 / Q_MAX  0.464292 / Loss  0.00890413112938
loop took 0.2698066234588623 seconds
TIMESTEP 7962 / STATE explore / EPSILON 0.09975154870004795 / ACTION 0 / REWARD 0.1 / Q_MAX  0.26719 / Loss  0.0154820065945
loop took 0.28566479682922363 seconds
TIMESTEP 7963 / STATE explore / EPSILON 0.09975151540004795 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.917362 / Loss  0.0120779946446
loop took 0.310837984085083 seconds
TIMESTEP 7964 / STATE explore / EPSILON 0.09975148210004796 / ACTION 0 / REWARD 0.1 / Q_MAX  0.294329 / Loss  0.0198468770832
loop took 0.29555630683898926 seconds
TIMESTEP 7965 / STA

TIMESTEP 8009 / STATE explore / EPSILON 0.09974998360004825 / ACTION 0 / REWARD 0.1 / Q_MAX  0.113763 / Loss  0.0115843378007
loop took 0.30481624603271484 seconds
TIMESTEP 8010 / STATE explore / EPSILON 0.09974995030004825 / ACTION 0 / REWARD 0.1 / Q_MAX  0.3555 / Loss  0.00912295654416
loop took 0.2954554557800293 seconds
TIMESTEP 8011 / STATE explore / EPSILON 0.09974991700004826 / ACTION 0 / REWARD -1 / Q_MAX  0.0863873 / Loss  0.0147852720693
loop took 0.29982662200927734 seconds
TIMESTEP 8012 / STATE explore / EPSILON 0.09974988370004827 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.623978 / Loss  0.0090581420809
loop took 0.30884790420532227 seconds
TIMESTEP 8013 / STATE explore / EPSILON 0.09974985040004827 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.07506 / Loss  0.0108328396454
loop took 0.305849552154541 seconds
TIMESTEP 8014 / STATE explore / EPSILON 0.09974981710004828 / ACTION 0 / REWARD 0.1 / Q_MAX  0.40712 / Loss  0.0100400252268
loop took 0.3088498115539551 seconds
TIMESTEP 8015 / STATE

TIMESTEP 8059 / STATE explore / EPSILON 0.09974831860004857 / ACTION 0 / REWARD -1 / Q_MAX  0.429906 / Loss  0.0282334238291
loop took 0.31084489822387695 seconds
TIMESTEP 8060 / STATE explore / EPSILON 0.09974828530004858 / ACTION 0 / REWARD 0.1 / Q_MAX  0.48811 / Loss  0.0133173624054
loop took 0.3092677593231201 seconds
TIMESTEP 8061 / STATE explore / EPSILON 0.09974825200004858 / ACTION 0 / REWARD 0.1 / Q_MAX  0.146573 / Loss  0.0143135339022
loop took 0.3107330799102783 seconds
TIMESTEP 8062 / STATE explore / EPSILON 0.09974821870004859 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0461086 / Loss  0.0478448234499
loop took 0.2857644557952881 seconds
TIMESTEP 8063 / STATE explore / EPSILON 0.0997481854000486 / ACTION 0 / REWARD 0.1 / Q_MAX  0.357934 / Loss  0.007077914197
loop took 0.31182265281677246 seconds
TIMESTEP 8064 / STATE explore / EPSILON 0.0997481521000486 / ACTION 0 / REWARD 0.1 / Q_MAX  0.132971 / Loss  0.0142855495214
loop took 0.3028573989868164 seconds
TIMESTEP 8065 / STATE e

TIMESTEP 8109 / STATE explore / EPSILON 0.09974665360004889 / ACTION 0 / REWARD 0.1 / Q_MAX  0.869055 / Loss  0.0156547874212
loop took 0.31534814834594727 seconds
TIMESTEP 8110 / STATE explore / EPSILON 0.0997466203000489 / ACTION 0 / REWARD 0.1 / Q_MAX  0.131855 / Loss  0.00909443479031
----------Random Action----------
loop took 1.0536198616027832 seconds
TIMESTEP 8111 / STATE explore / EPSILON 0.0997465870000489 / ACTION 1 / REWARD -1 / Q_MAX  0.324888 / Loss  0.0133603736758
loop took 0.2675814628601074 seconds
TIMESTEP 8112 / STATE explore / EPSILON 0.09974655370004891 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.563163 / Loss  0.00833672564477
loop took 0.2757911682128906 seconds
TIMESTEP 8113 / STATE explore / EPSILON 0.09974652040004892 / ACTION 0 / REWARD 0.1 / Q_MAX  0.155066 / Loss  0.0127884447575
loop took 0.2845420837402344 seconds
TIMESTEP 8114 / STATE explore / EPSILON 0.09974648710004892 / ACTION 0 / REWARD 0.1 / Q_MAX  0.31115 / Loss  0.00323461205699
loop took 0.275733470916

TIMESTEP 8159 / STATE explore / EPSILON 0.09974498860004921 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0324702 / Loss  0.0123132746667
loop took 0.29979681968688965 seconds
TIMESTEP 8160 / STATE explore / EPSILON 0.09974495530004922 / ACTION 0 / REWARD 0.1 / Q_MAX  0.00877588 / Loss  0.0342933423817
loop took 1.049788236618042 seconds
TIMESTEP 8161 / STATE explore / EPSILON 0.09974492200004922 / ACTION 1 / REWARD 0.1 / Q_MAX  0.254586 / Loss  0.0112242978066
loop took 0.2646200656890869 seconds
TIMESTEP 8162 / STATE explore / EPSILON 0.09974488870004923 / ACTION 0 / REWARD 0.1 / Q_MAX  0.71759 / Loss  0.00882448628545
loop took 0.26570582389831543 seconds
TIMESTEP 8163 / STATE explore / EPSILON 0.09974485540004924 / ACTION 0 / REWARD 0.1 / Q_MAX  0.449815 / Loss  0.00906429812312
loop took 0.2505781650543213 seconds
TIMESTEP 8164 / STATE explore / EPSILON 0.09974482210004924 / ACTION 0 / REWARD -1 / Q_MAX  0.389225 / Loss  0.0149527043104
loop took 0.2627294063568115 seconds
TIMESTEP 8165 / S

TIMESTEP 8209 / STATE explore / EPSILON 0.09974332360004953 / ACTION 0 / REWARD 0.1 / Q_MAX  0.439237 / Loss  0.0123460981995
loop took 0.28676629066467285 seconds
TIMESTEP 8210 / STATE explore / EPSILON 0.09974329030004954 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.734136 / Loss  0.0257714651525
----------Random Action----------
loop took 0.31383562088012695 seconds
TIMESTEP 8211 / STATE explore / EPSILON 0.09974325700004955 / ACTION 0 / REWARD 0.1 / Q_MAX  0.396254 / Loss  0.0201938468963
loop took 0.2998087406158447 seconds
TIMESTEP 8212 / STATE explore / EPSILON 0.09974322370004955 / ACTION 0 / REWARD 0.1 / Q_MAX  0.403034 / Loss  0.0121061038226
loop took 0.3129861354827881 seconds
TIMESTEP 8213 / STATE explore / EPSILON 0.09974319040004956 / ACTION 0 / REWARD -1 / Q_MAX  0.500118 / Loss  0.022393738851
loop took 0.31182312965393066 seconds
TIMESTEP 8214 / STATE explore / EPSILON 0.09974315710004956 / ACTION 0 / REWARD 0.1 / Q_MAX  0.394907 / Loss  0.0252367313951
loop took 0.30587625503

TIMESTEP 8259 / STATE explore / EPSILON 0.09974165860004985 / ACTION 0 / REWARD 0.1 / Q_MAX  0.35522 / Loss  0.0250284820795
loop took 0.31383442878723145 seconds
TIMESTEP 8260 / STATE explore / EPSILON 0.09974162530004986 / ACTION 0 / REWARD 0.1 / Q_MAX  0.365461 / Loss  0.0136398430914
loop took 0.33096885681152344 seconds
TIMESTEP 8261 / STATE explore / EPSILON 0.09974159200004987 / ACTION 0 / REWARD 0.1 / Q_MAX  0.380829 / Loss  0.0246571525931
loop took 0.29891300201416016 seconds
TIMESTEP 8262 / STATE explore / EPSILON 0.09974155870004987 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.66391 / Loss  0.0292442347854
loop took 0.31183791160583496 seconds
TIMESTEP 8263 / STATE explore / EPSILON 0.09974152540004988 / ACTION 0 / REWARD 0.1 / Q_MAX  0.394361 / Loss  0.053155913949
loop took 0.30480146408081055 seconds
TIMESTEP 8264 / STATE explore / EPSILON 0.09974149210004989 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.462945 / Loss  0.199772164226
loop took 0.29982900619506836 seconds
TIMESTEP 8265 / ST

TIMESTEP 8310 / STATE explore / EPSILON 0.09973996030005018 / ACTION 0 / REWARD 0.1 / Q_MAX  0.535298 / Loss  0.00905492901802
loop took 1.0348496437072754 seconds
TIMESTEP 8311 / STATE explore / EPSILON 0.09973992700005019 / ACTION 1 / REWARD 0.1 / Q_MAX  0.482062 / Loss  0.0313016474247
loop took 0.26871347427368164 seconds
TIMESTEP 8312 / STATE explore / EPSILON 0.0997398937000502 / ACTION 0 / REWARD 0.1 / Q_MAX  0.314502 / Loss  0.0191799290478
loop took 0.2667269706726074 seconds
TIMESTEP 8313 / STATE explore / EPSILON 0.0997398604000502 / ACTION 0 / REWARD 0.1 / Q_MAX  0.446077 / Loss  0.0202319230884
loop took 0.24887871742248535 seconds
TIMESTEP 8314 / STATE explore / EPSILON 0.09973982710005021 / ACTION 0 / REWARD -1 / Q_MAX  -0.145333 / Loss  0.0136140715331
loop took 0.2777824401855469 seconds
TIMESTEP 8315 / STATE explore / EPSILON 0.09973979380005021 / ACTION 0 / REWARD 0.1 / Q_MAX  0.806413 / Loss  0.0152214784175
loop took 0.28324222564697266 seconds
TIMESTEP 8316 / STAT

TIMESTEP 8360 / STATE explore / EPSILON 0.0997382953000505 / ACTION 0 / REWARD 0.1 / Q_MAX  0.462399 / Loss  0.0393819659948
loop took 0.32686495780944824 seconds
TIMESTEP 8361 / STATE explore / EPSILON 0.09973826200005051 / ACTION 0 / REWARD 0.1 / Q_MAX  0.514497 / Loss  0.00795832276344
loop took 0.2967865467071533 seconds
TIMESTEP 8362 / STATE explore / EPSILON 0.09973822870005052 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.811416 / Loss  0.0273315962404
loop took 0.28676486015319824 seconds
TIMESTEP 8363 / STATE explore / EPSILON 0.09973819540005052 / ACTION 0 / REWARD 0.1 / Q_MAX  0.277572 / Loss  0.0211531668901
loop took 0.31986045837402344 seconds
TIMESTEP 8364 / STATE explore / EPSILON 0.09973816210005053 / ACTION 0 / REWARD 0.1 / Q_MAX  0.280118 / Loss  0.0407020561397
loop took 0.2907538414001465 seconds
TIMESTEP 8365 / STATE explore / EPSILON 0.09973812880005054 / ACTION 0 / REWARD 0.1 / Q_MAX  0.62699 / Loss  0.0126700159162
loop took 0.29484081268310547 seconds
TIMESTEP 8366 / ST

TIMESTEP 8410 / STATE explore / EPSILON 0.09973663030005082 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.886426 / Loss  0.0178650747985
loop took 1.0354008674621582 seconds
TIMESTEP 8411 / STATE explore / EPSILON 0.09973659700005083 / ACTION 1 / REWARD 0.1 / Q_MAX  0.130801 / Loss  0.0492285527289
loop took 0.26871490478515625 seconds
TIMESTEP 8412 / STATE explore / EPSILON 0.09973656370005084 / ACTION 0 / REWARD 0.1 / Q_MAX  0.278802 / Loss  0.0112436674535
loop took 0.29276514053344727 seconds
TIMESTEP 8413 / STATE explore / EPSILON 0.09973653040005084 / ACTION 0 / REWARD 0.1 / Q_MAX  0.732723 / Loss  0.0268873944879
loop took 0.2727222442626953 seconds
TIMESTEP 8414 / STATE explore / EPSILON 0.09973649710005085 / ACTION 0 / REWARD 0.1 / Q_MAX  0.285681 / Loss  0.0175870563835
loop took 0.2677175998687744 seconds
TIMESTEP 8415 / STATE explore / EPSILON 0.09973646380005086 / ACTION 0 / REWARD 0.1 / Q_MAX  0.578431 / Loss  0.0188684668392
loop took 0.3018007278442383 seconds
TIMESTEP 8416 / STA

TIMESTEP 8461 / STATE explore / EPSILON 0.09973493200005115 / ACTION 0 / REWARD 0.1 / Q_MAX  0.207859 / Loss  0.0140568353236
loop took 0.3052992820739746 seconds
TIMESTEP 8462 / STATE explore / EPSILON 0.09973489870005116 / ACTION 0 / REWARD 0.1 / Q_MAX  0.179472 / Loss  0.00770907662809
loop took 0.2941551208496094 seconds
TIMESTEP 8463 / STATE explore / EPSILON 0.09973486540005116 / ACTION 0 / REWARD -1 / Q_MAX  0.162719 / Loss  0.0487882532179
loop took 0.3038055896759033 seconds
TIMESTEP 8464 / STATE explore / EPSILON 0.09973483210005117 / ACTION 0 / REWARD 0.1 / Q_MAX  0.180869 / Loss  0.0126533061266
loop took 0.29482007026672363 seconds
TIMESTEP 8465 / STATE explore / EPSILON 0.09973479880005118 / ACTION 0 / REWARD 0.1 / Q_MAX  0.202353 / Loss  0.0169040970504
loop took 0.28957056999206543 seconds
TIMESTEP 8466 / STATE explore / EPSILON 0.09973476550005118 / ACTION 0 / REWARD 0.1 / Q_MAX  0.426883 / Loss  0.0138217313215
loop took 0.28375983238220215 seconds
TIMESTEP 8467 / STA

TIMESTEP 8512 / STATE explore / EPSILON 0.09973323370005148 / ACTION 0 / REWARD 0.1 / Q_MAX  0.641767 / Loss  0.00798808876425
loop took 0.29678845405578613 seconds
TIMESTEP 8513 / STATE explore / EPSILON 0.09973320040005149 / ACTION 0 / REWARD -1 / Q_MAX  0.179207 / Loss  0.0274755507708
loop took 0.2827470302581787 seconds
TIMESTEP 8514 / STATE explore / EPSILON 0.09973316710005149 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.10208 / Loss  0.026534628123
loop took 0.32587289810180664 seconds
TIMESTEP 8515 / STATE explore / EPSILON 0.0997331338000515 / ACTION 0 / REWARD 0.1 / Q_MAX  0.543465 / Loss  0.0161804202944
loop took 0.2727224826812744 seconds
TIMESTEP 8516 / STATE explore / EPSILON 0.0997331005000515 / ACTION 0 / REWARD 0.1 / Q_MAX  0.447843 / Loss  0.0360221751034
loop took 0.32386255264282227 seconds
TIMESTEP 8517 / STATE explore / EPSILON 0.09973306720005151 / ACTION 0 / REWARD 0.1 / Q_MAX  0.424272 / Loss  0.0226971283555
loop took 0.3079700469970703 seconds
TIMESTEP 8518 / STATE 

TIMESTEP 8563 / STATE explore / EPSILON 0.09973153540005181 / ACTION 0 / REWARD 0.1 / Q_MAX  0.304891 / Loss  0.017954532057
loop took 0.3519599437713623 seconds
TIMESTEP 8564 / STATE explore / EPSILON 0.09973150210005181 / ACTION 0 / REWARD 0.1 / Q_MAX  0.619678 / Loss  0.108409374952
loop took 0.43367433547973633 seconds
TIMESTEP 8565 / STATE explore / EPSILON 0.09973146880005182 / ACTION 0 / REWARD 0.1 / Q_MAX  0.444208 / Loss  0.020426871255
loop took 0.3960530757904053 seconds
TIMESTEP 8566 / STATE explore / EPSILON 0.09973143550005183 / ACTION 0 / REWARD 0.1 / Q_MAX  0.273516 / Loss  0.0122495051473
loop took 0.4299163818359375 seconds
TIMESTEP 8567 / STATE explore / EPSILON 0.09973140220005183 / ACTION 0 / REWARD 0.1 / Q_MAX  0.482502 / Loss  0.0172619335353
loop took 0.37398505210876465 seconds
TIMESTEP 8568 / STATE explore / EPSILON 0.09973136890005184 / ACTION 0 / REWARD -1 / Q_MAX  0.862359 / Loss  0.00923973601311
loop took 0.3820159435272217 seconds
TIMESTEP 8569 / STATE e

TIMESTEP 8614 / STATE explore / EPSILON 0.09972983710005213 / ACTION 0 / REWARD 0.1 / Q_MAX  0.253539 / Loss  0.0140193197876
loop took 0.3494083881378174 seconds
TIMESTEP 8615 / STATE explore / EPSILON 0.09972980380005214 / ACTION 0 / REWARD -1 / Q_MAX  -0.700314 / Loss  0.0120330005884
loop took 0.3669764995574951 seconds
TIMESTEP 8616 / STATE explore / EPSILON 0.09972977050005215 / ACTION 0 / REWARD 0.1 / Q_MAX  0.384167 / Loss  0.0118712075055
loop took 0.3679792881011963 seconds
TIMESTEP 8617 / STATE explore / EPSILON 0.09972973720005215 / ACTION 0 / REWARD 0.1 / Q_MAX  0.466935 / Loss  0.00843791943043
loop took 0.36496996879577637 seconds
TIMESTEP 8618 / STATE explore / EPSILON 0.09972970390005216 / ACTION 0 / REWARD 0.1 / Q_MAX  0.100834 / Loss  0.0134585453197
loop took 0.3820161819458008 seconds
TIMESTEP 8619 / STATE explore / EPSILON 0.09972967060005217 / ACTION 0 / REWARD 0.1 / Q_MAX  0.100383 / Loss  0.0195789672434
loop took 0.36597371101379395 seconds
TIMESTEP 8620 / STA

TIMESTEP 8665 / STATE explore / EPSILON 0.09972813880005246 / ACTION 0 / REWARD 0.1 / Q_MAX  0.237978 / Loss  0.0161675922573
loop took 0.3215928077697754 seconds
TIMESTEP 8666 / STATE explore / EPSILON 0.09972810550005247 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.18558 / Loss  0.0119617823511
loop took 0.30686068534851074 seconds
TIMESTEP 8667 / STATE explore / EPSILON 0.09972807220005248 / ACTION 0 / REWARD 0.1 / Q_MAX  0.191453 / Loss  0.0206318553537
loop took 0.2547645568847656 seconds
TIMESTEP 8668 / STATE explore / EPSILON 0.09972803890005248 / ACTION 0 / REWARD -1 / Q_MAX  0.174921 / Loss  0.0120855271816
loop took 0.32285547256469727 seconds
TIMESTEP 8669 / STATE explore / EPSILON 0.09972800560005249 / ACTION 0 / REWARD 0.1 / Q_MAX  0.223929 / Loss  0.0281210578978
loop took 0.2770376205444336 seconds
TIMESTEP 8670 / STATE explore / EPSILON 0.0997279723000525 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.208149 / Loss  0.0230233315378
loop took 0.2827587127685547 seconds
TIMESTEP 8671 / STATE

TIMESTEP 8715 / STATE explore / EPSILON 0.09972647380005278 / ACTION 0 / REWARD 0.1 / Q_MAX  0.175807 / Loss  0.0148455277085
loop took 0.2847557067871094 seconds
TIMESTEP 8716 / STATE explore / EPSILON 0.09972644050005279 / ACTION 0 / REWARD 0.1 / Q_MAX  0.542387 / Loss  0.0134788043797
loop took 0.30960679054260254 seconds
TIMESTEP 8717 / STATE explore / EPSILON 0.0997264072000528 / ACTION 0 / REWARD 0.1 / Q_MAX  0.33261 / Loss  0.0294806864113
loop took 0.30167055130004883 seconds
TIMESTEP 8718 / STATE explore / EPSILON 0.0997263739000528 / ACTION 0 / REWARD 0.1 / Q_MAX  0.226305 / Loss  0.0225418880582
loop took 0.2974092960357666 seconds
TIMESTEP 8719 / STATE explore / EPSILON 0.09972634060005281 / ACTION 0 / REWARD 0.1 / Q_MAX  0.291616 / Loss  0.0120643954724
loop took 0.28579020500183105 seconds
TIMESTEP 8720 / STATE explore / EPSILON 0.09972630730005282 / ACTION 0 / REWARD -1 / Q_MAX  -0.879927 / Loss  0.0144073609263
loop took 0.3138241767883301 seconds
TIMESTEP 8721 / STATE 

TIMESTEP 8765 / STATE explore / EPSILON 0.0997248088000531 / ACTION 0 / REWARD 0.1 / Q_MAX  0.370259 / Loss  0.00968916434795
loop took 0.28375673294067383 seconds
TIMESTEP 8766 / STATE explore / EPSILON 0.09972477550005311 / ACTION 0 / REWARD 0.1 / Q_MAX  0.32257 / Loss  0.00918310601264
loop took 0.28275442123413086 seconds
TIMESTEP 8767 / STATE explore / EPSILON 0.09972474220005312 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.147417 / Loss  0.0132300266996
loop took 0.3078432083129883 seconds
TIMESTEP 8768 / STATE explore / EPSILON 0.09972470890005312 / ACTION 0 / REWARD 0.1 / Q_MAX  0.397 / Loss  0.0217171218246
loop took 0.29178714752197266 seconds
TIMESTEP 8769 / STATE explore / EPSILON 0.09972467560005313 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0434097 / Loss  0.0200851336122
loop took 0.28275442123413086 seconds
TIMESTEP 8770 / STATE explore / EPSILON 0.09972464230005314 / ACTION 0 / REWARD 0.1 / Q_MAX  0.603133 / Loss  0.0164483282715
loop took 0.3188455104827881 seconds
TIMESTEP 8771 / STA

TIMESTEP 8816 / STATE explore / EPSILON 0.09972311050005343 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0193784 / Loss  0.0230386462063
loop took 0.29987192153930664 seconds
TIMESTEP 8817 / STATE explore / EPSILON 0.09972307720005344 / ACTION 0 / REWARD 0.1 / Q_MAX  0.176107 / Loss  0.0227294750512
loop took 0.28876519203186035 seconds
TIMESTEP 8818 / STATE explore / EPSILON 0.09972304390005345 / ACTION 0 / REWARD 0.1 / Q_MAX  0.432309 / Loss  0.0158438496292
loop took 0.29255127906799316 seconds
TIMESTEP 8819 / STATE explore / EPSILON 0.09972301060005345 / ACTION 0 / REWARD 0.1 / Q_MAX  0.529212 / Loss  0.0278035178781
loop took 0.3141024112701416 seconds
TIMESTEP 8820 / STATE explore / EPSILON 0.09972297730005346 / ACTION 0 / REWARD 0.1 / Q_MAX  0.512826 / Loss  0.0205318406224
loop took 1.022339105606079 seconds
TIMESTEP 8821 / STATE explore / EPSILON 0.09972294400005347 / ACTION 1 / REWARD 0.1 / Q_MAX  0.634139 / Loss  0.0157122835517
loop took 0.28465700149536133 seconds
TIMESTEP 8822 / ST

TIMESTEP 8867 / STATE explore / EPSILON 0.09972141220005376 / ACTION 0 / REWARD 0.1 / Q_MAX  0.136416 / Loss  0.0350385755301
loop took 0.26671457290649414 seconds
TIMESTEP 8868 / STATE explore / EPSILON 0.09972137890005377 / ACTION 0 / REWARD 0.1 / Q_MAX  0.357729 / Loss  0.00766945211217
loop took 0.2992258071899414 seconds
TIMESTEP 8869 / STATE explore / EPSILON 0.09972134560005377 / ACTION 0 / REWARD -1 / Q_MAX  0.129228 / Loss  0.0173489358276
loop took 0.29447102546691895 seconds
TIMESTEP 8870 / STATE explore / EPSILON 0.09972131230005378 / ACTION 0 / REWARD 0.1 / Q_MAX  0.462139 / Loss  0.00501799304038
loop took 1.0029582977294922 seconds
TIMESTEP 8871 / STATE explore / EPSILON 0.09972127900005379 / ACTION 1 / REWARD 0.1 / Q_MAX  0.328815 / Loss  0.052998829633
loop took 0.26770734786987305 seconds
TIMESTEP 8872 / STATE explore / EPSILON 0.09972124570005379 / ACTION 0 / REWARD 0.1 / Q_MAX  0.343578 / Loss  0.0165182240307
loop took 0.28075456619262695 seconds
TIMESTEP 8873 / ST

TIMESTEP 8918 / STATE explore / EPSILON 0.09971971390005409 / ACTION 0 / REWARD 0.1 / Q_MAX  0.569931 / Loss  0.00792790949345
loop took 0.2807464599609375 seconds
TIMESTEP 8919 / STATE explore / EPSILON 0.0997196806000541 / ACTION 0 / REWARD 0.1 / Q_MAX  0.258474 / Loss  0.0212915204465
loop took 0.3870260715484619 seconds
TIMESTEP 8920 / STATE explore / EPSILON 0.0997196473000541 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0739399 / Loss  0.0154464691877
loop took 1.1065855026245117 seconds
TIMESTEP 8921 / STATE explore / EPSILON 0.09971961400005411 / ACTION 1 / REWARD 0.1 / Q_MAX  0.279186 / Loss  0.0235136002302
loop took 0.28576040267944336 seconds
TIMESTEP 8922 / STATE explore / EPSILON 0.09971958070005411 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0872729 / Loss  0.010867821984
loop took 0.2798638343811035 seconds
TIMESTEP 8923 / STATE explore / EPSILON 0.09971954740005412 / ACTION 0 / REWARD 0.1 / Q_MAX  0.291966 / Loss  0.00785025022924
loop took 0.3119077682495117 seconds
TIMESTEP 8924 / ST

TIMESTEP 8968 / STATE explore / EPSILON 0.09971804890005441 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.826725 / Loss  0.0133152659982
loop took 0.31224870681762695 seconds
TIMESTEP 8969 / STATE explore / EPSILON 0.09971801560005442 / ACTION 0 / REWARD 0.1 / Q_MAX  0.249594 / Loss  0.0167573485523
loop took 0.2853672504425049 seconds
TIMESTEP 8970 / STATE explore / EPSILON 0.09971798230005442 / ACTION 0 / REWARD -1 / Q_MAX  0.596224 / Loss  0.00448614358902
loop took 1.0500140190124512 seconds
TIMESTEP 8971 / STATE explore / EPSILON 0.09971794900005443 / ACTION 1 / REWARD 0.1 / Q_MAX  0.316211 / Loss  0.023271003738
loop took 0.26651573181152344 seconds
TIMESTEP 8972 / STATE explore / EPSILON 0.09971791570005444 / ACTION 0 / REWARD 0.1 / Q_MAX  0.236477 / Loss  0.018571825698
loop took 0.2917792797088623 seconds
TIMESTEP 8973 / STATE explore / EPSILON 0.09971788240005444 / ACTION 0 / REWARD 0.1 / Q_MAX  0.014744 / Loss  0.0301995649934
loop took 0.27152514457702637 seconds
TIMESTEP 8974 / STAT

TIMESTEP 9019 / STATE explore / EPSILON 0.09971635060005474 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0360413 / Loss  0.0173095837235
loop took 0.286851167678833 seconds
TIMESTEP 9020 / STATE explore / EPSILON 0.09971631730005474 / ACTION 0 / REWARD 0.1 / Q_MAX  0.345536 / Loss  0.00687076570466
loop took 0.29578685760498047 seconds
TIMESTEP 9021 / STATE explore / EPSILON 0.09971628400005475 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.674473 / Loss  0.0178718231618
loop took 0.2837560176849365 seconds
TIMESTEP 9022 / STATE explore / EPSILON 0.09971625070005476 / ACTION 0 / REWARD 0.1 / Q_MAX  0.393887 / Loss  0.0146409571171
loop took 0.28275132179260254 seconds
TIMESTEP 9023 / STATE explore / EPSILON 0.09971621740005476 / ACTION 0 / REWARD 0.1 / Q_MAX  0.278001 / Loss  0.0167158842087
loop took 0.2947864532470703 seconds
TIMESTEP 9024 / STATE explore / EPSILON 0.09971618410005477 / ACTION 0 / REWARD 0.1 / Q_MAX  0.259815 / Loss  0.00645139813423
loop took 0.2847578525543213 seconds
TIMESTEP 9025 / S

TIMESTEP 9070 / STATE explore / EPSILON 0.09971465230005506 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.268795 / Loss  0.0268075931817
loop took 0.2857682704925537 seconds
TIMESTEP 9071 / STATE explore / EPSILON 0.09971461900005507 / ACTION 0 / REWARD 0.1 / Q_MAX  0.390914 / Loss  0.00922844652086
loop took 0.2947843074798584 seconds
TIMESTEP 9072 / STATE explore / EPSILON 0.09971458570005508 / ACTION 0 / REWARD 0.1 / Q_MAX  0.431304 / Loss  0.0115384720266
loop took 0.2968423366546631 seconds
TIMESTEP 9073 / STATE explore / EPSILON 0.09971455240005508 / ACTION 0 / REWARD 0.1 / Q_MAX  0.425476 / Loss  0.030350625515
loop took 0.31174206733703613 seconds
TIMESTEP 9074 / STATE explore / EPSILON 0.09971451910005509 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.801787 / Loss  0.0171115063131
loop took 0.2777256965637207 seconds
TIMESTEP 9075 / STATE explore / EPSILON 0.0997144858000551 / ACTION 0 / REWARD 0.1 / Q_MAX  0.511847 / Loss  0.0134966559708
loop took 0.307828426361084 seconds
TIMESTEP 9076 / STATE

loop took 1.03007173538208 seconds
TIMESTEP 9121 / STATE explore / EPSILON 0.09971295400005539 / ACTION 1 / REWARD -1 / Q_MAX  0.314403 / Loss  0.0102559812367
loop took 0.2697162628173828 seconds
TIMESTEP 9122 / STATE explore / EPSILON 0.0997129207000554 / ACTION 0 / REWARD 0.1 / Q_MAX  0.455018 / Loss  0.0547295585275
loop took 0.2807629108428955 seconds
TIMESTEP 9123 / STATE explore / EPSILON 0.0997128874000554 / ACTION 0 / REWARD 0.1 / Q_MAX  1.71592 / Loss  0.0434389635921
loop took 0.2977898120880127 seconds
TIMESTEP 9124 / STATE explore / EPSILON 0.09971285410005541 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0981941 / Loss  0.0163847673684
loop took 0.2847599983215332 seconds
TIMESTEP 9125 / STATE explore / EPSILON 0.09971282080005542 / ACTION 0 / REWARD 0.1 / Q_MAX  0.356362 / Loss  0.0282164923847
loop took 0.26770615577697754 seconds
TIMESTEP 9126 / STATE explore / EPSILON 0.09971278750005542 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.07524 / Loss  0.0243695732206
loop took 0.2656950950622

loop took 1.0199685096740723 seconds
TIMESTEP 9171 / STATE explore / EPSILON 0.09971128900005571 / ACTION 1 / REWARD 0.1 / Q_MAX  0.226197 / Loss  0.0194616969675
loop took 0.2745683193206787 seconds
TIMESTEP 9172 / STATE explore / EPSILON 0.09971125570005572 / ACTION 0 / REWARD 0.1 / Q_MAX  0.234901 / Loss  0.0185649581254
loop took 0.2667093276977539 seconds
TIMESTEP 9173 / STATE explore / EPSILON 0.09971122240005573 / ACTION 0 / REWARD -1 / Q_MAX  0.311791 / Loss  0.0697242319584
loop took 0.2657144069671631 seconds
TIMESTEP 9174 / STATE explore / EPSILON 0.09971118910005573 / ACTION 0 / REWARD 0.1 / Q_MAX  0.203037 / Loss  0.0221709981561
loop took 0.3098897933959961 seconds
TIMESTEP 9175 / STATE explore / EPSILON 0.09971115580005574 / ACTION 0 / REWARD 0.1 / Q_MAX  0.285682 / Loss  0.0295363366604
loop took 0.3008456230163574 seconds
TIMESTEP 9176 / STATE explore / EPSILON 0.09971112250005575 / ACTION 0 / REWARD 0.1 / Q_MAX  0.484221 / Loss  0.0151858143508
loop took 0.27071142196

TIMESTEP 9221 / STATE explore / EPSILON 0.09970962400005604 / ACTION 0 / REWARD 0.1 / Q_MAX  0.328876 / Loss  0.0216002706438
loop took 0.319810152053833 seconds
TIMESTEP 9222 / STATE explore / EPSILON 0.09970959070005604 / ACTION 0 / REWARD 0.1 / Q_MAX  0.218155 / Loss  0.0161683149636
loop took 0.3058438301086426 seconds
TIMESTEP 9223 / STATE explore / EPSILON 0.09970955740005605 / ACTION 0 / REWARD 0.1 / Q_MAX  0.448798 / Loss  0.00684809079394
loop took 0.3092620372772217 seconds
TIMESTEP 9224 / STATE explore / EPSILON 0.09970952410005605 / ACTION 0 / REWARD -1 / Q_MAX  0.385344 / Loss  0.0164141058922
loop took 0.280794620513916 seconds
TIMESTEP 9225 / STATE explore / EPSILON 0.09970949080005606 / ACTION 0 / REWARD 0.1 / Q_MAX  1.05998 / Loss  0.0148482099175
loop took 0.3005537986755371 seconds
TIMESTEP 9226 / STATE explore / EPSILON 0.09970945750005607 / ACTION 0 / REWARD 0.1 / Q_MAX  0.244523 / Loss  0.0100756473839
loop took 0.26943159103393555 seconds
TIMESTEP 9227 / STATE ex

loop took 1.0286476612091064 seconds
TIMESTEP 9271 / STATE explore / EPSILON 0.09970795900005636 / ACTION 1 / REWARD 0.1 / Q_MAX  0.288423 / Loss  0.0128803784028
loop took 0.27358198165893555 seconds
TIMESTEP 9272 / STATE explore / EPSILON 0.09970792570005636 / ACTION 0 / REWARD 0.1 / Q_MAX  0.289285 / Loss  0.00447275303304
loop took 0.26674866676330566 seconds
TIMESTEP 9273 / STATE explore / EPSILON 0.09970789240005637 / ACTION 0 / REWARD 0.1 / Q_MAX  0.292662 / Loss  0.00663690082729
loop took 0.27574682235717773 seconds
TIMESTEP 9274 / STATE explore / EPSILON 0.09970785910005638 / ACTION 0 / REWARD -1 / Q_MAX  0.435262 / Loss  0.00417852727696
loop took 0.2838129997253418 seconds
TIMESTEP 9275 / STATE explore / EPSILON 0.09970782580005638 / ACTION 0 / REWARD 0.1 / Q_MAX  0.300805 / Loss  0.0172450989485
loop took 0.2873268127441406 seconds
TIMESTEP 9276 / STATE explore / EPSILON 0.09970779250005639 / ACTION 0 / REWARD 0.1 / Q_MAX  0.298837 / Loss  0.0158899184316
loop took 0.28375

TIMESTEP 9321 / STATE explore / EPSILON 0.09970629400005668 / ACTION 0 / REWARD 0.1 / Q_MAX  0.297181 / Loss  0.0046074022539
loop took 0.3198051452636719 seconds
TIMESTEP 9322 / STATE explore / EPSILON 0.09970626070005668 / ACTION 0 / REWARD 0.1 / Q_MAX  0.138813 / Loss  0.0097744865343
loop took 0.2967996597290039 seconds
TIMESTEP 9323 / STATE explore / EPSILON 0.09970622740005669 / ACTION 0 / REWARD 0.1 / Q_MAX  0.134891 / Loss  0.00918056163937
loop took 0.28380274772644043 seconds
TIMESTEP 9324 / STATE explore / EPSILON 0.0997061941000567 / ACTION 0 / REWARD 0.1 / Q_MAX  0.411493 / Loss  0.0121069811285
loop took 0.26754069328308105 seconds
TIMESTEP 9325 / STATE explore / EPSILON 0.0997061608000567 / ACTION 0 / REWARD -1 / Q_MAX  0.345318 / Loss  0.0115488413721
loop took 0.28150320053100586 seconds
TIMESTEP 9326 / STATE explore / EPSILON 0.09970612750005671 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.108616 / Loss  0.0086262896657
loop took 0.267711877822876 seconds
TIMESTEP 9327 / STATE

TIMESTEP 9371 / STATE explore / EPSILON 0.099704629000057 / ACTION 0 / REWARD 0.1 / Q_MAX  0.276086 / Loss  0.00859886966646
loop took 0.28354763984680176 seconds
TIMESTEP 9372 / STATE explore / EPSILON 0.099704595700057 / ACTION 0 / REWARD 0.1 / Q_MAX  0.610667 / Loss  0.0225377287716
loop took 0.29602575302124023 seconds
TIMESTEP 9373 / STATE explore / EPSILON 0.09970456240005701 / ACTION 0 / REWARD 0.1 / Q_MAX  0.268778 / Loss  0.00600016955286
loop took 0.30185747146606445 seconds
TIMESTEP 9374 / STATE explore / EPSILON 0.09970452910005702 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0314347 / Loss  0.0341732986271
loop took 0.2958369255065918 seconds
TIMESTEP 9375 / STATE explore / EPSILON 0.09970449580005702 / ACTION 0 / REWARD 0.1 / Q_MAX  0.484708 / Loss  0.0204083751887
loop took 0.2687065601348877 seconds
TIMESTEP 9376 / STATE explore / EPSILON 0.09970446250005703 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0355099 / Loss  0.0114829503
loop took 0.286243200302124 seconds
TIMESTEP 9377 / STATE

TIMESTEP 9421 / STATE explore / EPSILON 0.09970296400005732 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.107989 / Loss  0.0336986519396
loop took 0.2982914447784424 seconds
TIMESTEP 9422 / STATE explore / EPSILON 0.09970293070005733 / ACTION 0 / REWARD 0.1 / Q_MAX  0.341208 / Loss  0.00566069455817
loop took 0.2671637535095215 seconds
TIMESTEP 9423 / STATE explore / EPSILON 0.09970289740005733 / ACTION 0 / REWARD -1 / Q_MAX  0.0867654 / Loss  0.00747128808871
loop took 0.29981160163879395 seconds
TIMESTEP 9424 / STATE explore / EPSILON 0.09970286410005734 / ACTION 0 / REWARD 0.1 / Q_MAX  0.420558 / Loss  0.00774867506698
loop took 0.3048107624053955 seconds
TIMESTEP 9425 / STATE explore / EPSILON 0.09970283080005735 / ACTION 0 / REWARD 0.1 / Q_MAX  0.227502 / Loss  0.0118892053142
loop took 0.2877655029296875 seconds
TIMESTEP 9426 / STATE explore / EPSILON 0.09970279750005735 / ACTION 0 / REWARD 0.1 / Q_MAX  0.448347 / Loss  0.0140030831099
loop took 0.30097270011901855 seconds
TIMESTEP 9427 / 

TIMESTEP 9472 / STATE explore / EPSILON 0.09970126570005765 / ACTION 0 / REWARD 0.1 / Q_MAX  0.247842 / Loss  0.0214442517608
loop took 0.2966289520263672 seconds
TIMESTEP 9473 / STATE explore / EPSILON 0.09970123240005765 / ACTION 0 / REWARD 0.1 / Q_MAX  0.550753 / Loss  0.00594833493233
loop took 0.28402256965637207 seconds
TIMESTEP 9474 / STATE explore / EPSILON 0.09970119910005766 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.830426 / Loss  0.00394489336759
loop took 0.2944648265838623 seconds
TIMESTEP 9475 / STATE explore / EPSILON 0.09970116580005767 / ACTION 0 / REWARD -1 / Q_MAX  0.280487 / Loss  0.0184025391936
loop took 0.27344202995300293 seconds
TIMESTEP 9476 / STATE explore / EPSILON 0.09970113250005767 / ACTION 0 / REWARD 0.1 / Q_MAX  0.36823 / Loss  0.0108755547553
loop took 0.28293848037719727 seconds
TIMESTEP 9477 / STATE explore / EPSILON 0.09970109920005768 / ACTION 0 / REWARD 0.1 / Q_MAX  0.155968 / Loss  0.00730701908469
loop took 0.3048055171966553 seconds
TIMESTEP 9478 / S

TIMESTEP 9523 / STATE explore / EPSILON 0.09969956740005798 / ACTION 0 / REWARD 0.1 / Q_MAX  0.240587 / Loss  0.0135415848345
loop took 0.28568410873413086 seconds
TIMESTEP 9524 / STATE explore / EPSILON 0.09969953410005798 / ACTION 0 / REWARD -1 / Q_MAX  0.166834 / Loss  0.0165280979127
loop took 0.2686939239501953 seconds
TIMESTEP 9525 / STATE explore / EPSILON 0.09969950080005799 / ACTION 0 / REWARD 0.1 / Q_MAX  0.498011 / Loss  0.0116464551538
loop took 0.2776658535003662 seconds
TIMESTEP 9526 / STATE explore / EPSILON 0.099699467500058 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0507849 / Loss  0.0268061943352
loop took 0.28516507148742676 seconds
TIMESTEP 9527 / STATE explore / EPSILON 0.099699434200058 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.724988 / Loss  0.0342607982457
loop took 0.26871633529663086 seconds
TIMESTEP 9528 / STATE explore / EPSILON 0.09969940090005801 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.183719 / Loss  0.0255150012672
loop took 0.28365349769592285 seconds
TIMESTEP 9529 / STA

TIMESTEP 9573 / STATE explore / EPSILON 0.0996979024000583 / ACTION 0 / REWARD 0.1 / Q_MAX  0.428213 / Loss  0.0154182780534
loop took 0.3098294734954834 seconds
TIMESTEP 9574 / STATE explore / EPSILON 0.0996978691000583 / ACTION 0 / REWARD 0.1 / Q_MAX  0.445342 / Loss  0.0369618237019
loop took 0.2858467102050781 seconds
TIMESTEP 9575 / STATE explore / EPSILON 0.09969783580005831 / ACTION 0 / REWARD 0.1 / Q_MAX  0.438947 / Loss  0.00793676357716
loop took 0.3239779472351074 seconds
TIMESTEP 9576 / STATE explore / EPSILON 0.09969780250005832 / ACTION 0 / REWARD -1 / Q_MAX  0.371182 / Loss  0.00476856809109
loop took 0.2899000644683838 seconds
TIMESTEP 9577 / STATE explore / EPSILON 0.09969776920005832 / ACTION 0 / REWARD 0.1 / Q_MAX  0.390976 / Loss  0.011907722801
loop took 0.2878086566925049 seconds
TIMESTEP 9578 / STATE explore / EPSILON 0.09969773590005833 / ACTION 0 / REWARD 0.1 / Q_MAX  0.325999 / Loss  0.025992134586
loop took 0.26679301261901855 seconds
TIMESTEP 9579 / STATE ex

TIMESTEP 9624 / STATE explore / EPSILON 0.09969620410005862 / ACTION 0 / REWARD 0.1 / Q_MAX  0.5937 / Loss  0.00743088126183
loop took 0.30068206787109375 seconds
TIMESTEP 9625 / STATE explore / EPSILON 0.09969617080005863 / ACTION 0 / REWARD 0.1 / Q_MAX  0.372977 / Loss  0.00881906505674
loop took 0.3018965721130371 seconds
TIMESTEP 9626 / STATE explore / EPSILON 0.09969613750005864 / ACTION 0 / REWARD -1 / Q_MAX  0.388532 / Loss  0.0423412211239
loop took 0.280747652053833 seconds
TIMESTEP 9627 / STATE explore / EPSILON 0.09969610420005864 / ACTION 0 / REWARD 0.1 / Q_MAX  0.379761 / Loss  0.0498594939709
loop took 0.30582356452941895 seconds
TIMESTEP 9628 / STATE explore / EPSILON 0.09969607090005865 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.193235 / Loss  0.0141951628029
loop took 0.28975486755371094 seconds
TIMESTEP 9629 / STATE explore / EPSILON 0.09969603760005866 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.934891 / Loss  0.0160470362753
loop took 0.30994248390197754 seconds
TIMESTEP 9630 / ST

TIMESTEP 9675 / STATE explore / EPSILON 0.09969450580005895 / ACTION 0 / REWARD 0.1 / Q_MAX  0.562833 / Loss  0.0156089793891
loop took 0.26770758628845215 seconds
TIMESTEP 9676 / STATE explore / EPSILON 0.09969447250005896 / ACTION 0 / REWARD 0.1 / Q_MAX  0.239275 / Loss  0.00504508428276
loop took 0.26570630073547363 seconds
TIMESTEP 9677 / STATE explore / EPSILON 0.09969443920005897 / ACTION 0 / REWARD 0.1 / Q_MAX  0.370066 / Loss  0.0103265969083
loop took 0.283754825592041 seconds
TIMESTEP 9678 / STATE explore / EPSILON 0.09969440590005897 / ACTION 0 / REWARD 0.1 / Q_MAX  0.450818 / Loss  0.0103162620217
loop took 0.2649819850921631 seconds
TIMESTEP 9679 / STATE explore / EPSILON 0.09969437260005898 / ACTION 0 / REWARD 0.1 / Q_MAX  0.254337 / Loss  0.0047884886153
loop took 0.27881479263305664 seconds
TIMESTEP 9680 / STATE explore / EPSILON 0.09969433930005898 / ACTION 0 / REWARD 0.1 / Q_MAX  0.437686 / Loss  0.0127001106739
loop took 1.0183424949645996 seconds
TIMESTEP 9681 / STA

TIMESTEP 9725 / STATE explore / EPSILON 0.09969284080005927 / ACTION 0 / REWARD 0.1 / Q_MAX  0.369935 / Loss  0.0102101201192
loop took 0.2503647804260254 seconds
TIMESTEP 9726 / STATE explore / EPSILON 0.09969280750005928 / ACTION 0 / REWARD -1 / Q_MAX  0.484161 / Loss  0.00743476301432
loop took 0.2927823066711426 seconds
TIMESTEP 9727 / STATE explore / EPSILON 0.09969277420005929 / ACTION 0 / REWARD 0.1 / Q_MAX  0.279659 / Loss  0.0203623399138
loop took 0.2724952697753906 seconds
TIMESTEP 9728 / STATE explore / EPSILON 0.09969274090005929 / ACTION 0 / REWARD 0.1 / Q_MAX  0.543523 / Loss  0.00849867798388
loop took 0.29790282249450684 seconds
TIMESTEP 9729 / STATE explore / EPSILON 0.0996927076000593 / ACTION 0 / REWARD 0.1 / Q_MAX  0.368772 / Loss  0.00911790505052
loop took 0.2847304344177246 seconds
TIMESTEP 9730 / STATE explore / EPSILON 0.0996926743000593 / ACTION 0 / REWARD 0.1 / Q_MAX  0.450207 / Loss  0.00731707829982
loop took 0.2667055130004883 seconds
TIMESTEP 9731 / STAT

TIMESTEP 9775 / STATE explore / EPSILON 0.0996911758000596 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0607123 / Loss  0.024909902364
loop took 0.2708415985107422 seconds
TIMESTEP 9776 / STATE explore / EPSILON 0.0996911425000596 / ACTION 0 / REWARD 0.1 / Q_MAX  0.166377 / Loss  0.00313074979931
loop took 0.2816650867462158 seconds
TIMESTEP 9777 / STATE explore / EPSILON 0.09969110920005961 / ACTION 0 / REWARD -1 / Q_MAX  0.106465 / Loss  0.0112310443074
loop took 0.2697443962097168 seconds
TIMESTEP 9778 / STATE explore / EPSILON 0.09969107590005961 / ACTION 0 / REWARD 0.1 / Q_MAX  0.100408 / Loss  0.0116476472467
loop took 0.2657194137573242 seconds
TIMESTEP 9779 / STATE explore / EPSILON 0.09969104260005962 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0446023 / Loss  0.0150665575638
loop took 0.26570653915405273 seconds
TIMESTEP 9780 / STATE explore / EPSILON 0.09969100930005963 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0851977 / Loss  0.0170335136354
loop took 1.0107550621032715 seconds
TIMESTEP 9781 / STA

TIMESTEP 9826 / STATE explore / EPSILON 0.09968947750005992 / ACTION 0 / REWARD 0.1 / Q_MAX  0.367614 / Loss  0.0236211828887
loop took 0.2687194347381592 seconds
TIMESTEP 9827 / STATE explore / EPSILON 0.09968944420005993 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.329047 / Loss  0.0315531045198
loop took 0.2668609619140625 seconds
TIMESTEP 9828 / STATE explore / EPSILON 0.09968941090005994 / ACTION 0 / REWARD 0.1 / Q_MAX  0.226881 / Loss  0.00390364835039
loop took 0.293778657913208 seconds
TIMESTEP 9829 / STATE explore / EPSILON 0.09968937760005994 / ACTION 0 / REWARD 0.1 / Q_MAX  0.408451 / Loss  0.0107158608735
loop took 0.27063441276550293 seconds
TIMESTEP 9830 / STATE explore / EPSILON 0.09968934430005995 / ACTION 0 / REWARD 0.1 / Q_MAX  0.209007 / Loss  0.0110625978559
----------Random Action----------
loop took 0.3156094551086426 seconds
TIMESTEP 9831 / STATE explore / EPSILON 0.09968931100005995 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.237764 / Loss  0.0153495371342
loop took 0.3119974136

TIMESTEP 9876 / STATE explore / EPSILON 0.09968781250006024 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.811433 / Loss  0.0420309342444
loop took 0.2687089443206787 seconds
TIMESTEP 9877 / STATE explore / EPSILON 0.09968777920006025 / ACTION 0 / REWARD 0.1 / Q_MAX  0.109788 / Loss  0.0137565992773
loop took 0.27774882316589355 seconds
TIMESTEP 9878 / STATE explore / EPSILON 0.09968774590006026 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.15785 / Loss  0.010826613754
loop took 0.29782748222351074 seconds
TIMESTEP 9879 / STATE explore / EPSILON 0.09968771260006026 / ACTION 0 / REWARD 0.1 / Q_MAX  1.02114 / Loss  0.00984698720276
loop took 0.28886890411376953 seconds
TIMESTEP 9880 / STATE explore / EPSILON 0.09968767930006027 / ACTION 0 / REWARD -1 / Q_MAX  0.106491 / Loss  0.0134645951912
loop took 0.28073811531066895 seconds
TIMESTEP 9881 / STATE explore / EPSILON 0.09968764600006028 / ACTION 0 / REWARD 0.1 / Q_MAX  0.111262 / Loss  0.0191023219377
loop took 0.26871156692504883 seconds
TIMESTEP 9882 / ST

TIMESTEP 9927 / STATE explore / EPSILON 0.09968611420006057 / ACTION 0 / REWARD -1 / Q_MAX  0.222744 / Loss  0.0190046932548
loop took 0.2677178382873535 seconds
TIMESTEP 9928 / STATE explore / EPSILON 0.09968608090006058 / ACTION 0 / REWARD 0.1 / Q_MAX  0.266667 / Loss  0.0234938021749
loop took 0.26610612869262695 seconds
TIMESTEP 9929 / STATE explore / EPSILON 0.09968604760006058 / ACTION 0 / REWARD 0.1 / Q_MAX  0.112891 / Loss  0.0493150055408
loop took 0.26770997047424316 seconds
TIMESTEP 9930 / STATE explore / EPSILON 0.09968601430006059 / ACTION 0 / REWARD 0.1 / Q_MAX  0.164588 / Loss  0.00790880527347
loop took 0.2817566394805908 seconds
TIMESTEP 9931 / STATE explore / EPSILON 0.0996859810000606 / ACTION 0 / REWARD 0.1 / Q_MAX  0.291949 / Loss  0.0126329082996
loop took 0.2838015556335449 seconds
TIMESTEP 9932 / STATE explore / EPSILON 0.0996859477000606 / ACTION 0 / REWARD 0.1 / Q_MAX  0.138628 / Loss  0.012699758634
loop took 0.29520297050476074 seconds
TIMESTEP 9933 / STATE 

TIMESTEP 9978 / STATE explore / EPSILON 0.0996844159000609 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0398161 / Loss  0.00855072773993
loop took 0.265702486038208 seconds
TIMESTEP 9979 / STATE explore / EPSILON 0.0996843826000609 / ACTION 0 / REWARD 0.1 / Q_MAX  0.406216 / Loss  0.0093489876017
loop took 0.26574039459228516 seconds
TIMESTEP 9980 / STATE explore / EPSILON 0.09968434930006091 / ACTION 0 / REWARD 0.1 / Q_MAX  0.280806 / Loss  0.0154665783048
loop took 1.0323410034179688 seconds
TIMESTEP 9981 / STATE explore / EPSILON 0.09968431600006092 / ACTION 1 / REWARD -1 / Q_MAX  0.43538 / Loss  0.0493570379913
loop took 0.26879096031188965 seconds
TIMESTEP 9982 / STATE explore / EPSILON 0.09968428270006092 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0216957 / Loss  0.0194186475128
loop took 0.2946951389312744 seconds
TIMESTEP 9983 / STATE explore / EPSILON 0.09968424940006093 / ACTION 0 / REWARD 0.1 / Q_MAX  0.352976 / Loss  0.00962347630411
loop took 0.2837533950805664 seconds
TIMESTEP 9984 / STAT

TIMESTEP 10028 / STATE explore / EPSILON 0.09968275090006122 / ACTION 0 / REWARD 0.1 / Q_MAX  0.373846 / Loss  0.00646358169615
loop took 0.29778623580932617 seconds
TIMESTEP 10029 / STATE explore / EPSILON 0.09968271760006123 / ACTION 0 / REWARD 0.1 / Q_MAX  0.176274 / Loss  0.00886757019907
loop took 0.2637054920196533 seconds
TIMESTEP 10030 / STATE explore / EPSILON 0.09968268430006123 / ACTION 0 / REWARD 0.1 / Q_MAX  0.180653 / Loss  0.0117190293968
----------Random Action----------
loop took 0.28552770614624023 seconds
TIMESTEP 10031 / STATE explore / EPSILON 0.09968265100006124 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0302964 / Loss  0.014530910179
loop took 0.27680444717407227 seconds
TIMESTEP 10032 / STATE explore / EPSILON 0.09968261770006125 / ACTION 0 / REWARD 0.1 / Q_MAX  0.344612 / Loss  0.00775713892654
loop took 0.28715944290161133 seconds
TIMESTEP 10033 / STATE explore / EPSILON 0.09968258440006125 / ACTION 0 / REWARD 0.1 / Q_MAX  0.471114 / Loss  0.00620216969401
loop took 

TIMESTEP 10078 / STATE explore / EPSILON 0.09968108590006154 / ACTION 0 / REWARD 0.1 / Q_MAX  0.420688 / Loss  0.0201326254755
loop took 0.30358266830444336 seconds
TIMESTEP 10079 / STATE explore / EPSILON 0.09968105260006155 / ACTION 0 / REWARD -1 / Q_MAX  0.45752 / Loss  0.0547890178859
loop took 0.27672600746154785 seconds
TIMESTEP 10080 / STATE explore / EPSILON 0.09968101930006155 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0818703 / Loss  0.0158569067717
loop took 0.2677135467529297 seconds
TIMESTEP 10081 / STATE explore / EPSILON 0.09968098600006156 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.848594 / Loss  0.0124776083976
loop took 0.2667078971862793 seconds
TIMESTEP 10082 / STATE explore / EPSILON 0.09968095270006157 / ACTION 0 / REWARD 0.1 / Q_MAX  0.483929 / Loss  0.0158107187599
loop took 0.2657456398010254 seconds
TIMESTEP 10083 / STATE explore / EPSILON 0.09968091940006157 / ACTION 0 / REWARD 0.1 / Q_MAX  0.480724 / Loss  0.0110162105411
loop took 0.26470279693603516 seconds
TIMESTEP 1008

TIMESTEP 10128 / STATE explore / EPSILON 0.09967942090006186 / ACTION 0 / REWARD 0.1 / Q_MAX  0.121667 / Loss  0.0275217909366
loop took 0.2837545871734619 seconds
TIMESTEP 10129 / STATE explore / EPSILON 0.09967938760006187 / ACTION 0 / REWARD 0.1 / Q_MAX  0.255674 / Loss  0.0158455744386
loop took 0.2987945079803467 seconds
TIMESTEP 10130 / STATE explore / EPSILON 0.09967935430006188 / ACTION 0 / REWARD 0.1 / Q_MAX  0.214923 / Loss  0.00468087056652
loop took 0.2825028896331787 seconds
TIMESTEP 10131 / STATE explore / EPSILON 0.09967932100006188 / ACTION 0 / REWARD 0.1 / Q_MAX  0.598834 / Loss  0.00818286556751
loop took 0.29578375816345215 seconds
TIMESTEP 10132 / STATE explore / EPSILON 0.09967928770006189 / ACTION 0 / REWARD 0.1 / Q_MAX  0.162592 / Loss  0.0121238939464
loop took 0.29923367500305176 seconds
TIMESTEP 10133 / STATE explore / EPSILON 0.0996792544000619 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.434538 / Loss  0.0143767325208
loop took 0.28702378273010254 seconds
TIMESTEP 10

TIMESTEP 10178 / STATE explore / EPSILON 0.09967775590006218 / ACTION 0 / REWARD 0.1 / Q_MAX  0.516337 / Loss  0.0172529481351
loop took 0.28676486015319824 seconds
TIMESTEP 10179 / STATE explore / EPSILON 0.09967772260006219 / ACTION 0 / REWARD 0.1 / Q_MAX  0.214395 / Loss  0.0170326754451
loop took 0.26266050338745117 seconds
TIMESTEP 10180 / STATE explore / EPSILON 0.0996776893000622 / ACTION 0 / REWARD -1 / Q_MAX  0.213572 / Loss  0.0063038142398
loop took 0.2835361957550049 seconds
TIMESTEP 10181 / STATE explore / EPSILON 0.0996776560000622 / ACTION 0 / REWARD 0.1 / Q_MAX  0.213365 / Loss  0.0181261114776
loop took 0.2927863597869873 seconds
TIMESTEP 10182 / STATE explore / EPSILON 0.09967762270006221 / ACTION 0 / REWARD 0.1 / Q_MAX  0.411145 / Loss  0.00782139413059
loop took 0.2907693386077881 seconds
TIMESTEP 10183 / STATE explore / EPSILON 0.09967758940006222 / ACTION 0 / REWARD 0.1 / Q_MAX  0.350405 / Loss  0.0130943544209
loop took 0.2827479839324951 seconds
TIMESTEP 10184 /

TIMESTEP 10228 / STATE explore / EPSILON 0.0996760909000625 / ACTION 0 / REWARD 0.1 / Q_MAX  0.381563 / Loss  0.0329544767737
loop took 0.2797582149505615 seconds
TIMESTEP 10229 / STATE explore / EPSILON 0.09967605760006251 / ACTION 0 / REWARD 0.1 / Q_MAX  0.530471 / Loss  0.0505511686206
loop took 0.302905797958374 seconds
TIMESTEP 10230 / STATE explore / EPSILON 0.09967602430006252 / ACTION 0 / REWARD 0.1 / Q_MAX  0.266658 / Loss  0.0282765217125
loop took 0.30046725273132324 seconds
TIMESTEP 10231 / STATE explore / EPSILON 0.09967599100006252 / ACTION 0 / REWARD 0.1 / Q_MAX  0.271843 / Loss  0.012837129645
loop took 0.27673888206481934 seconds
TIMESTEP 10232 / STATE explore / EPSILON 0.09967595770006253 / ACTION 0 / REWARD 0.1 / Q_MAX  0.381328 / Loss  0.0466168858111
loop took 0.3008553981781006 seconds
TIMESTEP 10233 / STATE explore / EPSILON 0.09967592440006254 / ACTION 0 / REWARD 0.1 / Q_MAX  0.286635 / Loss  0.0529819838703
loop took 0.27171993255615234 seconds
TIMESTEP 10234 /

TIMESTEP 10278 / STATE explore / EPSILON 0.09967442590006283 / ACTION 0 / REWARD 0.1 / Q_MAX  0.200547 / Loss  0.0081864502281
loop took 0.29578518867492676 seconds
TIMESTEP 10279 / STATE explore / EPSILON 0.09967439260006283 / ACTION 0 / REWARD 0.1 / Q_MAX  0.376967 / Loss  0.00560782663524
loop took 0.3289215564727783 seconds
TIMESTEP 10280 / STATE explore / EPSILON 0.09967435930006284 / ACTION 0 / REWARD 0.1 / Q_MAX  0.181962 / Loss  0.0126860085875
loop took 0.28786706924438477 seconds
TIMESTEP 10281 / STATE explore / EPSILON 0.09967432600006285 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.939487 / Loss  0.0110685527325
loop took 0.28475499153137207 seconds
TIMESTEP 10282 / STATE explore / EPSILON 0.09967429270006285 / ACTION 0 / REWARD 0.1 / Q_MAX  0.194703 / Loss  0.0160968154669
loop took 0.26689863204956055 seconds
TIMESTEP 10283 / STATE explore / EPSILON 0.09967425940006286 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.226338 / Loss  0.0102527234703
loop took 0.28278541564941406 seconds
TIMESTEP

TIMESTEP 10328 / STATE explore / EPSILON 0.09967276090006315 / ACTION 0 / REWARD -1 / Q_MAX  0.253387 / Loss  0.0170189812779
loop took 0.2947688102722168 seconds
TIMESTEP 10329 / STATE explore / EPSILON 0.09967272760006315 / ACTION 0 / REWARD 0.1 / Q_MAX  0.177778 / Loss  0.00336490222253
loop took 0.2872428894042969 seconds
TIMESTEP 10330 / STATE explore / EPSILON 0.09967269430006316 / ACTION 0 / REWARD 0.1 / Q_MAX  0.185268 / Loss  0.0193359032273
loop took 0.31026291847229004 seconds
TIMESTEP 10331 / STATE explore / EPSILON 0.09967266100006317 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0335556 / Loss  0.00888604670763
loop took 0.28977465629577637 seconds
TIMESTEP 10332 / STATE explore / EPSILON 0.09967262770006317 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.246781 / Loss  0.0127871539444
loop took 0.29854679107666016 seconds
TIMESTEP 10333 / STATE explore / EPSILON 0.09967259440006318 / ACTION 0 / REWARD 0.1 / Q_MAX  0.212207 / Loss  0.0209293439984
loop took 0.3019371032714844 seconds
TIMESTEP 

TIMESTEP 10378 / STATE explore / EPSILON 0.09967109590006347 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.634719 / Loss  0.00804586615413
loop took 0.28609204292297363 seconds
TIMESTEP 10379 / STATE explore / EPSILON 0.09967106260006348 / ACTION 0 / REWARD -1 / Q_MAX  -0.388569 / Loss  0.0116255022585
loop took 0.26846766471862793 seconds
TIMESTEP 10380 / STATE explore / EPSILON 0.09967102930006348 / ACTION 0 / REWARD 0.1 / Q_MAX  0.391706 / Loss  0.018132917583
loop took 0.2837820053100586 seconds
TIMESTEP 10381 / STATE explore / EPSILON 0.09967099600006349 / ACTION 0 / REWARD 0.1 / Q_MAX  0.130949 / Loss  0.00619469257072
loop took 0.3059060573577881 seconds
TIMESTEP 10382 / STATE explore / EPSILON 0.0996709627000635 / ACTION 0 / REWARD 0.1 / Q_MAX  0.282796 / Loss  0.012015055865
loop took 0.2858750820159912 seconds
TIMESTEP 10383 / STATE explore / EPSILON 0.0996709294000635 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.618634 / Loss  0.0083883292973
loop took 0.28475117683410645 seconds
TIMESTEP 1038

TIMESTEP 10428 / STATE explore / EPSILON 0.09966943090006379 / ACTION 0 / REWARD -1 / Q_MAX  0.132624 / Loss  0.0199912730604
loop took 0.295823335647583 seconds
TIMESTEP 10429 / STATE explore / EPSILON 0.0996693976000638 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.845698 / Loss  0.00816749408841
loop took 0.30461621284484863 seconds
TIMESTEP 10430 / STATE explore / EPSILON 0.0996693643000638 / ACTION 0 / REWARD 0.1 / Q_MAX  0.158103 / Loss  0.00354435155168
loop took 0.2947728633880615 seconds
TIMESTEP 10431 / STATE explore / EPSILON 0.09966933100006381 / ACTION 0 / REWARD 0.1 / Q_MAX  0.155381 / Loss  0.011726738885
loop took 0.27372193336486816 seconds
TIMESTEP 10432 / STATE explore / EPSILON 0.09966929770006382 / ACTION 0 / REWARD 0.1 / Q_MAX  0.155791 / Loss  0.0108624314889
loop took 0.291776180267334 seconds
TIMESTEP 10433 / STATE explore / EPSILON 0.09966926440006382 / ACTION 0 / REWARD 0.1 / Q_MAX  0.388175 / Loss  0.00761093106121
loop took 0.29882335662841797 seconds
TIMESTEP 10434 

TIMESTEP 10478 / STATE explore / EPSILON 0.09966776590006411 / ACTION 0 / REWARD -1 / Q_MAX  0.58455 / Loss  0.458957403898
loop took 0.26458215713500977 seconds
TIMESTEP 10479 / STATE explore / EPSILON 0.09966773260006412 / ACTION 0 / REWARD 0.1 / Q_MAX  0.216089 / Loss  0.0238472428173
loop took 0.2672288417816162 seconds
TIMESTEP 10480 / STATE explore / EPSILON 0.09966769930006412 / ACTION 0 / REWARD 0.1 / Q_MAX  0.567267 / Loss  0.025452889502
loop took 0.2829477787017822 seconds
TIMESTEP 10481 / STATE explore / EPSILON 0.09966766600006413 / ACTION 0 / REWARD 0.1 / Q_MAX  0.00640467 / Loss  0.0103937573731
loop took 0.2849128246307373 seconds
TIMESTEP 10482 / STATE explore / EPSILON 0.09966763270006414 / ACTION 0 / REWARD 0.1 / Q_MAX  0.405345 / Loss  0.0134185682982
loop took 0.3389012813568115 seconds
TIMESTEP 10483 / STATE explore / EPSILON 0.09966759940006414 / ACTION 0 / REWARD 0.1 / Q_MAX  0.171257 / Loss  0.00653387885541
loop took 0.3178439140319824 seconds
TIMESTEP 10484 /

TIMESTEP 10528 / STATE explore / EPSILON 0.09966610090006443 / ACTION 0 / REWARD 0.1 / Q_MAX  0.214428 / Loss  0.0154500985518
loop took 0.3138749599456787 seconds
TIMESTEP 10529 / STATE explore / EPSILON 0.09966606760006444 / ACTION 0 / REWARD 0.1 / Q_MAX  0.219207 / Loss  0.0196679979563
loop took 0.31686949729919434 seconds
TIMESTEP 10530 / STATE explore / EPSILON 0.09966603430006445 / ACTION 0 / REWARD 0.1 / Q_MAX  0.223172 / Loss  0.0149500705302
loop took 0.28502964973449707 seconds
TIMESTEP 10531 / STATE explore / EPSILON 0.09966600100006445 / ACTION 0 / REWARD 0.1 / Q_MAX  0.294187 / Loss  0.00607929332182
loop took 0.2837543487548828 seconds
TIMESTEP 10532 / STATE explore / EPSILON 0.09966596770006446 / ACTION 0 / REWARD 0.1 / Q_MAX  0.231773 / Loss  0.0263393558562
loop took 0.2930300235748291 seconds
TIMESTEP 10533 / STATE explore / EPSILON 0.09966593440006447 / ACTION 0 / REWARD 0.1 / Q_MAX  0.251918 / Loss  0.0061823958531
loop took 0.29701876640319824 seconds
TIMESTEP 105

TIMESTEP 10578 / STATE explore / EPSILON 0.09966443590006475 / ACTION 0 / REWARD 0.1 / Q_MAX  0.586157 / Loss  0.0051559265703
loop took 0.3028092384338379 seconds
TIMESTEP 10579 / STATE explore / EPSILON 0.09966440260006476 / ACTION 0 / REWARD 0.1 / Q_MAX  0.365585 / Loss  0.00777596095577
loop took 0.2875652313232422 seconds
TIMESTEP 10580 / STATE explore / EPSILON 0.09966436930006477 / ACTION 0 / REWARD 0.1 / Q_MAX  0.253309 / Loss  0.0204651318491
loop took 0.2907588481903076 seconds
TIMESTEP 10581 / STATE explore / EPSILON 0.09966433600006477 / ACTION 0 / REWARD -1 / Q_MAX  0.389339 / Loss  0.00297566503286
loop took 0.29599881172180176 seconds
TIMESTEP 10582 / STATE explore / EPSILON 0.09966430270006478 / ACTION 0 / REWARD 0.1 / Q_MAX  0.748692 / Loss  0.0141883054748
loop took 0.29979753494262695 seconds
TIMESTEP 10583 / STATE explore / EPSILON 0.09966426940006479 / ACTION 0 / REWARD 0.1 / Q_MAX  0.249114 / Loss  0.0525372549891
loop took 0.28675389289855957 seconds
TIMESTEP 105

TIMESTEP 10628 / STATE explore / EPSILON 0.09966277090006508 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0705496 / Loss  0.00572970137
loop took 0.29456496238708496 seconds
TIMESTEP 10629 / STATE explore / EPSILON 0.09966273760006508 / ACTION 0 / REWARD 0.1 / Q_MAX  0.248515 / Loss  0.0151303038001
loop took 0.2997910976409912 seconds
TIMESTEP 10630 / STATE explore / EPSILON 0.09966270430006509 / ACTION 0 / REWARD 0.1 / Q_MAX  0.438929 / Loss  0.00333194527775
loop took 0.2998342514038086 seconds
TIMESTEP 10631 / STATE explore / EPSILON 0.0996626710000651 / ACTION 0 / REWARD 0.1 / Q_MAX  0.423334 / Loss  0.0327661707997
loop took 0.328763484954834 seconds
TIMESTEP 10632 / STATE explore / EPSILON 0.0996626377000651 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0786755 / Loss  0.0145430676639
loop took 0.30150485038757324 seconds
TIMESTEP 10633 / STATE explore / EPSILON 0.09966260440006511 / ACTION 0 / REWARD -1 / Q_MAX  -0.865239 / Loss  0.00874684099108
loop took 0.2858257293701172 seconds
TIMESTEP 10634 

TIMESTEP 10678 / STATE explore / EPSILON 0.0996611059000654 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.223245 / Loss  0.0264535211027
loop took 0.28376007080078125 seconds
TIMESTEP 10679 / STATE explore / EPSILON 0.0996610726000654 / ACTION 0 / REWARD 0.1 / Q_MAX  0.328606 / Loss  0.0198539737612
loop took 0.2837519645690918 seconds
TIMESTEP 10680 / STATE explore / EPSILON 0.09966103930006541 / ACTION 0 / REWARD -1 / Q_MAX  0.374121 / Loss  0.0148272914812
loop took 0.28174638748168945 seconds
TIMESTEP 10681 / STATE explore / EPSILON 0.09966100600006542 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.690319 / Loss  0.00858071167022
loop took 0.2837555408477783 seconds
TIMESTEP 10682 / STATE explore / EPSILON 0.09966097270006542 / ACTION 0 / REWARD 0.1 / Q_MAX  0.37931 / Loss  0.00682131759822
loop took 0.2827572822570801 seconds
TIMESTEP 10683 / STATE explore / EPSILON 0.09966093940006543 / ACTION 0 / REWARD 0.1 / Q_MAX  0.194821 / Loss  0.0181773398072
loop took 0.285764217376709 seconds
TIMESTEP 10684 

TIMESTEP 10728 / STATE explore / EPSILON 0.09965944090006572 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.87807 / Loss  0.00718181300908
loop took 0.3299529552459717 seconds
TIMESTEP 10729 / STATE explore / EPSILON 0.09965940760006572 / ACTION 0 / REWARD 0.1 / Q_MAX  0.300007 / Loss  0.00594199309126
loop took 0.3068568706512451 seconds
TIMESTEP 10730 / STATE explore / EPSILON 0.09965937430006573 / ACTION 0 / REWARD -1 / Q_MAX  0.173185 / Loss  0.0115721644834
loop took 0.30129170417785645 seconds
TIMESTEP 10731 / STATE explore / EPSILON 0.09965934100006574 / ACTION 0 / REWARD 0.1 / Q_MAX  0.375998 / Loss  0.013941930607
loop took 0.2957892417907715 seconds
TIMESTEP 10732 / STATE explore / EPSILON 0.09965930770006574 / ACTION 0 / REWARD 0.1 / Q_MAX  0.177758 / Loss  0.00653039524332
loop took 0.3038361072540283 seconds
TIMESTEP 10733 / STATE explore / EPSILON 0.09965927440006575 / ACTION 0 / REWARD 0.1 / Q_MAX  0.278082 / Loss  0.0115843340755
loop took 0.2810947895050049 seconds
TIMESTEP 10734

TIMESTEP 10778 / STATE explore / EPSILON 0.09965777590006604 / ACTION 0 / REWARD -1 / Q_MAX  0.2847 / Loss  0.00809917598963
loop took 0.2717127799987793 seconds
TIMESTEP 10779 / STATE explore / EPSILON 0.09965774260006605 / ACTION 0 / REWARD 0.1 / Q_MAX  0.138115 / Loss  0.00676899217069
loop took 0.31334733963012695 seconds
TIMESTEP 10780 / STATE explore / EPSILON 0.09965770930006605 / ACTION 0 / REWARD 0.1 / Q_MAX  0.14554 / Loss  0.0303895249963
loop took 0.30292749404907227 seconds
TIMESTEP 10781 / STATE explore / EPSILON 0.09965767600006606 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.330641 / Loss  0.0233696363866
loop took 0.3088095188140869 seconds
TIMESTEP 10782 / STATE explore / EPSILON 0.09965764270006607 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.571023 / Loss  0.0165669154376
loop took 0.30480527877807617 seconds
TIMESTEP 10783 / STATE explore / EPSILON 0.09965760940006607 / ACTION 0 / REWARD 0.1 / Q_MAX  0.27093 / Loss  0.0230851899832
loop took 0.29576730728149414 seconds
TIMESTEP 1078

TIMESTEP 10828 / STATE explore / EPSILON 0.09965611090006636 / ACTION 0 / REWARD 0.1 / Q_MAX  0.189801 / Loss  0.0058248299174
loop took 0.29838085174560547 seconds
TIMESTEP 10829 / STATE explore / EPSILON 0.09965607760006637 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.963718 / Loss  0.00420581968501
loop took 0.312298059463501 seconds
TIMESTEP 10830 / STATE explore / EPSILON 0.09965604430006637 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.194278 / Loss  0.0246958658099
loop took 0.29678940773010254 seconds
TIMESTEP 10831 / STATE explore / EPSILON 0.09965601100006638 / ACTION 0 / REWARD 0.1 / Q_MAX  0.181556 / Loss  0.00522096036002
loop took 0.2887706756591797 seconds
TIMESTEP 10832 / STATE explore / EPSILON 0.09965597770006639 / ACTION 0 / REWARD 0.1 / Q_MAX  0.179757 / Loss  0.00781136238948
loop took 0.3268442153930664 seconds
TIMESTEP 10833 / STATE explore / EPSILON 0.09965594440006639 / ACTION 0 / REWARD -1 / Q_MAX  0.179109 / Loss  0.00790254026651
loop took 0.3042924404144287 seconds
TIMESTEP 1

TIMESTEP 10878 / STATE explore / EPSILON 0.09965444590006668 / ACTION 0 / REWARD 0.1 / Q_MAX  0.449052 / Loss  0.0276578795165
loop took 0.2967870235443115 seconds
TIMESTEP 10879 / STATE explore / EPSILON 0.09965441260006669 / ACTION 0 / REWARD 0.1 / Q_MAX  0.599862 / Loss  0.0463918522
loop took 0.31630611419677734 seconds
TIMESTEP 10880 / STATE explore / EPSILON 0.0996543793000667 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.149124 / Loss  0.0145490849391
loop took 1.0229413509368896 seconds
TIMESTEP 10881 / STATE explore / EPSILON 0.0996543460000667 / ACTION 1 / REWARD 0.1 / Q_MAX  0.115487 / Loss  0.0144018838182
loop took 0.27573251724243164 seconds
TIMESTEP 10882 / STATE explore / EPSILON 0.09965431270006671 / ACTION 0 / REWARD 0.1 / Q_MAX  0.474456 / Loss  0.0260834358633
loop took 0.2827601432800293 seconds
TIMESTEP 10883 / STATE explore / EPSILON 0.09965427940006671 / ACTION 0 / REWARD 0.1 / Q_MAX  0.252448 / Loss  0.00802112370729
loop took 0.3104369640350342 seconds
TIMESTEP 10884 / 

TIMESTEP 10928 / STATE explore / EPSILON 0.099652780900067 / ACTION 0 / REWARD 0.1 / Q_MAX  0.187131 / Loss  0.00721513014287
loop took 0.29723024368286133 seconds
TIMESTEP 10929 / STATE explore / EPSILON 0.09965274760006701 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0968744 / Loss  0.00848632305861
loop took 0.2847621440887451 seconds
TIMESTEP 10930 / STATE explore / EPSILON 0.09965271430006702 / ACTION 0 / REWARD 0.1 / Q_MAX  0.101169 / Loss  0.00915371440351
loop took 1.031148910522461 seconds
TIMESTEP 10931 / STATE explore / EPSILON 0.09965268100006702 / ACTION 1 / REWARD -1 / Q_MAX  -0.780682 / Loss  0.0110179372132
loop took 0.2817530632019043 seconds
TIMESTEP 10932 / STATE explore / EPSILON 0.09965264770006703 / ACTION 0 / REWARD 0.1 / Q_MAX  0.182079 / Loss  0.015903763473
loop took 0.26770758628845215 seconds
TIMESTEP 10933 / STATE explore / EPSILON 0.09965261440006704 / ACTION 0 / REWARD 0.1 / Q_MAX  0.229014 / Loss  0.00777050340548
loop took 0.266709566116333 seconds
TIMESTEP 10934

TIMESTEP 10978 / STATE explore / EPSILON 0.09965111590006732 / ACTION 0 / REWARD 0.1 / Q_MAX  0.280996 / Loss  0.005460774526
loop took 0.30085062980651855 seconds
TIMESTEP 10979 / STATE explore / EPSILON 0.09965108260006733 / ACTION 0 / REWARD 0.1 / Q_MAX  0.191393 / Loss  0.0270751398057
loop took 0.3004298210144043 seconds
TIMESTEP 10980 / STATE explore / EPSILON 0.09965104930006734 / ACTION 0 / REWARD 0.1 / Q_MAX  0.188696 / Loss  0.0349936671555
loop took 0.31790733337402344 seconds
TIMESTEP 10981 / STATE explore / EPSILON 0.09965101600006734 / ACTION 0 / REWARD 0.1 / Q_MAX  0.314091 / Loss  0.0104643376544
loop took 0.2977561950683594 seconds
TIMESTEP 10982 / STATE explore / EPSILON 0.09965098270006735 / ACTION 0 / REWARD 0.1 / Q_MAX  0.307625 / Loss  0.0223068334162
loop took 0.30007433891296387 seconds
TIMESTEP 10983 / STATE explore / EPSILON 0.09965094940006736 / ACTION 0 / REWARD 0.1 / Q_MAX  0.410011 / Loss  0.0192468315363
loop took 0.31162071228027344 seconds
TIMESTEP 1098

TIMESTEP 11028 / STATE explore / EPSILON 0.09964945090006765 / ACTION 0 / REWARD 0.1 / Q_MAX  0.119319 / Loss  0.0160688627511
loop took 0.2515420913696289 seconds
TIMESTEP 11029 / STATE explore / EPSILON 0.09964941760006765 / ACTION 0 / REWARD -1 / Q_MAX  0.373724 / Loss  0.0152723742649
loop took 0.28049755096435547 seconds
TIMESTEP 11030 / STATE explore / EPSILON 0.09964938430006766 / ACTION 0 / REWARD 0.1 / Q_MAX  0.124614 / Loss  0.0244243852794
----------Random Action----------
loop took 1.033313274383545 seconds
TIMESTEP 11031 / STATE explore / EPSILON 0.09964935100006767 / ACTION 1 / REWARD 0.1 / Q_MAX  0.126734 / Loss  0.00514376629144
loop took 0.2817513942718506 seconds
TIMESTEP 11032 / STATE explore / EPSILON 0.09964931770006767 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.165341 / Loss  0.0336323156953
loop took 0.2818119525909424 seconds
TIMESTEP 11033 / STATE explore / EPSILON 0.09964928440006768 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.226796 / Loss  0.0154493544251
loop took 0.32153

TIMESTEP 11078 / STATE explore / EPSILON 0.09964778590006797 / ACTION 0 / REWARD -1 / Q_MAX  0.218936 / Loss  0.00632698088884
loop took 0.28463077545166016 seconds
TIMESTEP 11079 / STATE explore / EPSILON 0.09964775260006797 / ACTION 0 / REWARD 0.1 / Q_MAX  0.227954 / Loss  0.00701993191615
loop took 0.29378318786621094 seconds
TIMESTEP 11080 / STATE explore / EPSILON 0.09964771930006798 / ACTION 0 / REWARD 0.1 / Q_MAX  0.255659 / Loss  0.00912578590214
loop took 0.3053317070007324 seconds
TIMESTEP 11081 / STATE explore / EPSILON 0.09964768600006799 / ACTION 0 / REWARD 0.1 / Q_MAX  0.305487 / Loss  0.0170025378466
loop took 0.28318238258361816 seconds
TIMESTEP 11082 / STATE explore / EPSILON 0.09964765270006799 / ACTION 0 / REWARD 0.1 / Q_MAX  0.305669 / Loss  0.0238630175591
loop took 0.29979753494262695 seconds
TIMESTEP 11083 / STATE explore / EPSILON 0.099647619400068 / ACTION 0 / REWARD 0.1 / Q_MAX  0.545676 / Loss  0.00688820099458
loop took 0.2947869300842285 seconds
TIMESTEP 11

TIMESTEP 11128 / STATE explore / EPSILON 0.09964612090006829 / ACTION 0 / REWARD 0.1 / Q_MAX  0.678132 / Loss  0.00706639746204
loop took 0.268718957901001 seconds
TIMESTEP 11129 / STATE explore / EPSILON 0.0996460876000683 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.84855 / Loss  0.00974259246141
loop took 0.299802303314209 seconds
TIMESTEP 11130 / STATE explore / EPSILON 0.0996460543000683 / ACTION 0 / REWARD 0.1 / Q_MAX  0.327032 / Loss  0.0164646282792
loop took 1.0291218757629395 seconds
TIMESTEP 11131 / STATE explore / EPSILON 0.09964602100006831 / ACTION 1 / REWARD 0.1 / Q_MAX  0.523334 / Loss  0.0214828029275
loop took 0.26771068572998047 seconds
TIMESTEP 11132 / STATE explore / EPSILON 0.09964598770006831 / ACTION 0 / REWARD 0.1 / Q_MAX  0.668443 / Loss  0.0186998117715
loop took 0.3027830123901367 seconds
TIMESTEP 11133 / STATE explore / EPSILON 0.09964595440006832 / ACTION 0 / REWARD -1 / Q_MAX  0.35371 / Loss  0.0263613071293
loop took 0.32686376571655273 seconds
TIMESTEP 11134 / S

TIMESTEP 11178 / STATE explore / EPSILON 0.09964445590006861 / ACTION 0 / REWARD 0.1 / Q_MAX  0.456177 / Loss  0.00787749141455
loop took 0.2998499870300293 seconds
TIMESTEP 11179 / STATE explore / EPSILON 0.09964442260006862 / ACTION 0 / REWARD 0.1 / Q_MAX  0.468517 / Loss  0.0147483702749
loop took 0.2967970371246338 seconds
TIMESTEP 11180 / STATE explore / EPSILON 0.09964438930006862 / ACTION 0 / REWARD -1 / Q_MAX  0.28192 / Loss  0.0243996996433
loop took 0.2837710380554199 seconds
TIMESTEP 11181 / STATE explore / EPSILON 0.09964435600006863 / ACTION 0 / REWARD 0.1 / Q_MAX  -1.03255 / Loss  0.00643109343946
loop took 0.30074405670166016 seconds
TIMESTEP 11182 / STATE explore / EPSILON 0.09964432270006864 / ACTION 0 / REWARD 0.1 / Q_MAX  0.405106 / Loss  0.0116695258766
loop took 0.282787561416626 seconds
TIMESTEP 11183 / STATE explore / EPSILON 0.09964428940006864 / ACTION 0 / REWARD 0.1 / Q_MAX  0.652737 / Loss  0.00627924362198
loop took 0.2837538719177246 seconds
TIMESTEP 11184 

TIMESTEP 11228 / STATE explore / EPSILON 0.09964279090006893 / ACTION 0 / REWARD 0.1 / Q_MAX  0.412928 / Loss  0.0145088164136
loop took 0.2998824119567871 seconds
TIMESTEP 11229 / STATE explore / EPSILON 0.09964275760006894 / ACTION 0 / REWARD 0.1 / Q_MAX  0.413218 / Loss  0.027748003602
loop took 0.2827467918395996 seconds
TIMESTEP 11230 / STATE explore / EPSILON 0.09964272430006894 / ACTION 0 / REWARD 0.1 / Q_MAX  0.286362 / Loss  0.0129319150001
loop took 1.0193839073181152 seconds
TIMESTEP 11231 / STATE explore / EPSILON 0.09964269100006895 / ACTION 1 / REWARD 0.1 / Q_MAX  0.401681 / Loss  0.022284720093
loop took 0.2916390895843506 seconds
TIMESTEP 11232 / STATE explore / EPSILON 0.09964265770006896 / ACTION 0 / REWARD 0.1 / Q_MAX  0.395323 / Loss  0.0116705875844
loop took 0.26970911026000977 seconds
TIMESTEP 11233 / STATE explore / EPSILON 0.09964262440006896 / ACTION 0 / REWARD 0.1 / Q_MAX  0.392581 / Loss  0.00697508640587
loop took 0.2774021625518799 seconds
TIMESTEP 11234 /

TIMESTEP 11278 / STATE explore / EPSILON 0.09964112590006925 / ACTION 0 / REWARD 0.1 / Q_MAX  0.292149 / Loss  0.0150649938732
loop took 0.29186248779296875 seconds
TIMESTEP 11279 / STATE explore / EPSILON 0.09964109260006926 / ACTION 0 / REWARD -1 / Q_MAX  0.303002 / Loss  0.00466565648094
loop took 0.2897682189941406 seconds
TIMESTEP 11280 / STATE explore / EPSILON 0.09964105930006927 / ACTION 0 / REWARD 0.1 / Q_MAX  0.317092 / Loss  0.0205842126161
loop took 0.2837543487548828 seconds
TIMESTEP 11281 / STATE explore / EPSILON 0.09964102600006927 / ACTION 0 / REWARD 0.1 / Q_MAX  0.330339 / Loss  0.0105627216399
loop took 0.28275275230407715 seconds
TIMESTEP 11282 / STATE explore / EPSILON 0.09964099270006928 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0438455 / Loss  0.0670745000243
loop took 0.31183409690856934 seconds
TIMESTEP 11283 / STATE explore / EPSILON 0.09964095940006928 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0230845 / Loss  0.0123302340508
loop took 0.31407809257507324 seconds
TIMESTEP

TIMESTEP 11328 / STATE explore / EPSILON 0.09963946090006957 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.915242 / Loss  0.00557433487847
loop took 0.29979729652404785 seconds
TIMESTEP 11329 / STATE explore / EPSILON 0.09963942760006958 / ACTION 0 / REWARD 0.1 / Q_MAX  0.303357 / Loss  0.0287222750485
loop took 0.2707254886627197 seconds
TIMESTEP 11330 / STATE explore / EPSILON 0.09963939430006959 / ACTION 0 / REWARD 0.1 / Q_MAX  0.562633 / Loss  0.0225472338498
loop took 1.0227985382080078 seconds
TIMESTEP 11331 / STATE explore / EPSILON 0.09963936100006959 / ACTION 1 / REWARD 0.1 / Q_MAX  0.303748 / Loss  0.0122952926904
loop took 0.26160120964050293 seconds
TIMESTEP 11332 / STATE explore / EPSILON 0.0996393277000696 / ACTION 0 / REWARD 0.1 / Q_MAX  0.298697 / Loss  0.0108225438744
loop took 0.30683040618896484 seconds
TIMESTEP 11333 / STATE explore / EPSILON 0.0996392944000696 / ACTION 0 / REWARD 0.1 / Q_MAX  0.295475 / Loss  0.0417165756226
loop took 0.2736318111419678 seconds
TIMESTEP 1133

TIMESTEP 11378 / STATE explore / EPSILON 0.0996377959000699 / ACTION 0 / REWARD 0.1 / Q_MAX  0.314755 / Loss  0.00255302712321
loop took 0.2907726764678955 seconds
TIMESTEP 11379 / STATE explore / EPSILON 0.0996377626000699 / ACTION 0 / REWARD 0.1 / Q_MAX  0.304494 / Loss  0.00577989034355
loop took 0.2818450927734375 seconds
TIMESTEP 11380 / STATE explore / EPSILON 0.09963772930006991 / ACTION 0 / REWARD 0.1 / Q_MAX  0.210554 / Loss  0.0107039092109
loop took 0.3100855350494385 seconds
TIMESTEP 11381 / STATE explore / EPSILON 0.09963769600006991 / ACTION 0 / REWARD 0.1 / Q_MAX  0.507798 / Loss  0.00689253862947
loop took 0.29788851737976074 seconds
TIMESTEP 11382 / STATE explore / EPSILON 0.09963766270006992 / ACTION 0 / REWARD 0.1 / Q_MAX  0.491885 / Loss  0.00847885757685
loop took 0.30187439918518066 seconds
TIMESTEP 11383 / STATE explore / EPSILON 0.09963762940006993 / ACTION 0 / REWARD 0.1 / Q_MAX  0.220074 / Loss  0.0036366598215
loop took 0.28869032859802246 seconds
TIMESTEP 11

TIMESTEP 11428 / STATE explore / EPSILON 0.09963613090007022 / ACTION 0 / REWARD 0.1 / Q_MAX  0.190918 / Loss  0.014653804712
loop took 0.28876233100891113 seconds
TIMESTEP 11429 / STATE explore / EPSILON 0.09963609760007022 / ACTION 0 / REWARD 0.1 / Q_MAX  0.158774 / Loss  0.0192305948585
loop took 0.3068575859069824 seconds
TIMESTEP 11430 / STATE explore / EPSILON 0.09963606430007023 / ACTION 0 / REWARD 0.1 / Q_MAX  0.423395 / Loss  0.015208600089
loop took 0.28876686096191406 seconds
TIMESTEP 11431 / STATE explore / EPSILON 0.09963603100007024 / ACTION 0 / REWARD 0.1 / Q_MAX  0.521861 / Loss  0.0256114192307
loop took 0.2847611904144287 seconds
TIMESTEP 11432 / STATE explore / EPSILON 0.09963599770007024 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0822829 / Loss  0.00427209306508
loop took 0.2997913360595703 seconds
TIMESTEP 11433 / STATE explore / EPSILON 0.09963596440007025 / ACTION 0 / REWARD 0.1 / Q_MAX  0.196516 / Loss  0.00859187636524
loop took 0.2834601402282715 seconds
TIMESTEP 114

TIMESTEP 11478 / STATE explore / EPSILON 0.09963446590007054 / ACTION 0 / REWARD 0.1 / Q_MAX  0.243974 / Loss  0.00423098262399
loop took 0.3118326663970947 seconds
TIMESTEP 11479 / STATE explore / EPSILON 0.09963443260007054 / ACTION 0 / REWARD 0.1 / Q_MAX  0.106953 / Loss  0.00317820161581
loop took 0.2907581329345703 seconds
TIMESTEP 11480 / STATE explore / EPSILON 0.09963439930007055 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.854076 / Loss  0.0116740167141
loop took 1.0307974815368652 seconds
TIMESTEP 11481 / STATE explore / EPSILON 0.09963436600007056 / ACTION 1 / REWARD -1 / Q_MAX  0.242629 / Loss  0.0133269764483
loop took 0.2696850299835205 seconds
TIMESTEP 11482 / STATE explore / EPSILON 0.09963433270007056 / ACTION 0 / REWARD 0.1 / Q_MAX  0.193719 / Loss  0.00863459520042
loop took 0.2791321277618408 seconds
TIMESTEP 11483 / STATE explore / EPSILON 0.09963429940007057 / ACTION 0 / REWARD 0.1 / Q_MAX  0.300055 / Loss  0.021966535598
loop took 0.2687106132507324 seconds
TIMESTEP 11484

TIMESTEP 11528 / STATE explore / EPSILON 0.09963280090007086 / ACTION 0 / REWARD 0.1 / Q_MAX  0.50628 / Loss  0.00943913124502
loop took 0.3226938247680664 seconds
TIMESTEP 11529 / STATE explore / EPSILON 0.09963276760007087 / ACTION 0 / REWARD 0.1 / Q_MAX  0.484876 / Loss  0.0041247732006
loop took 0.28125715255737305 seconds
TIMESTEP 11530 / STATE explore / EPSILON 0.09963273430007087 / ACTION 0 / REWARD 0.1 / Q_MAX  0.337392 / Loss  0.00908220000565
----------Random Action----------
loop took 0.28575968742370605 seconds
TIMESTEP 11531 / STATE explore / EPSILON 0.09963270100007088 / ACTION 0 / REWARD 0.1 / Q_MAX  0.432265 / Loss  0.0235818512738
loop took 0.28278279304504395 seconds
TIMESTEP 11532 / STATE explore / EPSILON 0.09963266770007088 / ACTION 0 / REWARD 0.1 / Q_MAX  0.21896 / Loss  0.0100739058107
loop took 0.2998208999633789 seconds
TIMESTEP 11533 / STATE explore / EPSILON 0.09963263440007089 / ACTION 0 / REWARD 0.1 / Q_MAX  0.540184 / Loss  0.00596299953759
loop took 0.282

TIMESTEP 11578 / STATE explore / EPSILON 0.09963113590007118 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0598381 / Loss  0.0135484356433
loop took 0.3096005916595459 seconds
TIMESTEP 11579 / STATE explore / EPSILON 0.09963110260007119 / ACTION 0 / REWARD -1 / Q_MAX  0.375549 / Loss  0.00636349897832
loop took 0.311281681060791 seconds
TIMESTEP 11580 / STATE explore / EPSILON 0.09963106930007119 / ACTION 0 / REWARD 0.1 / Q_MAX  0.286207 / Loss  0.0264227855951
loop took 0.3068075180053711 seconds
TIMESTEP 11581 / STATE explore / EPSILON 0.0996310360000712 / ACTION 0 / REWARD 0.1 / Q_MAX  0.430376 / Loss  0.0201414003968
loop took 0.29869675636291504 seconds
TIMESTEP 11582 / STATE explore / EPSILON 0.0996310027000712 / ACTION 0 / REWARD 0.1 / Q_MAX  0.385549 / Loss  0.0173395108432
loop took 0.2987983226776123 seconds
TIMESTEP 11583 / STATE explore / EPSILON 0.09963096940007121 / ACTION 0 / REWARD 0.1 / Q_MAX  0.178417 / Loss  0.00448429957032
loop took 0.2856006622314453 seconds
TIMESTEP 11584 

TIMESTEP 11628 / STATE explore / EPSILON 0.0996294709000715 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.451783 / Loss  0.0135713266209
loop took 0.35494160652160645 seconds
TIMESTEP 11629 / STATE explore / EPSILON 0.09962943760007151 / ACTION 0 / REWARD 0.1 / Q_MAX  0.387419 / Loss  0.0239060558379
loop took 0.36397242546081543 seconds
TIMESTEP 11630 / STATE explore / EPSILON 0.09962940430007151 / ACTION 0 / REWARD 0.1 / Q_MAX  0.155528 / Loss  0.00375569565222
loop took 1.0502583980560303 seconds
TIMESTEP 11631 / STATE explore / EPSILON 0.09962937100007152 / ACTION 1 / REWARD 0.1 / Q_MAX  0.393311 / Loss  0.0169861596078
loop took 0.2848508358001709 seconds
TIMESTEP 11632 / STATE explore / EPSILON 0.09962933770007153 / ACTION 0 / REWARD -1 / Q_MAX  0.108431 / Loss  0.00270098214969
loop took 0.27834320068359375 seconds
TIMESTEP 11633 / STATE explore / EPSILON 0.09962930440007153 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0831148 / Loss  0.00955524668097
loop took 0.28876805305480957 seconds
TIMESTEP 

TIMESTEP 11678 / STATE explore / EPSILON 0.09962780590007182 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.0291828 / Loss  0.008112099953
loop took 0.2998025417327881 seconds
TIMESTEP 11679 / STATE explore / EPSILON 0.09962777260007183 / ACTION 0 / REWARD 0.1 / Q_MAX  0.359578 / Loss  0.01234126091
loop took 0.29979777336120605 seconds
TIMESTEP 11680 / STATE explore / EPSILON 0.09962773930007184 / ACTION 0 / REWARD 0.1 / Q_MAX  0.439484 / Loss  0.0548122636974
loop took 1.0399291515350342 seconds
TIMESTEP 11681 / STATE explore / EPSILON 0.09962770600007184 / ACTION 1 / REWARD 0.1 / Q_MAX  0.329181 / Loss  0.0105774402618
loop took 0.28756093978881836 seconds
TIMESTEP 11682 / STATE explore / EPSILON 0.09962767270007185 / ACTION 0 / REWARD 0.1 / Q_MAX  0.179102 / Loss  0.00537047535181
loop took 0.28348803520202637 seconds
TIMESTEP 11683 / STATE explore / EPSILON 0.09962763940007185 / ACTION 0 / REWARD -1 / Q_MAX  0.272426 / Loss  0.023372201249
loop took 0.2687110900878906 seconds
TIMESTEP 11684 

TIMESTEP 11728 / STATE explore / EPSILON 0.09962614090007214 / ACTION 0 / REWARD 0.1 / Q_MAX  0.182255 / Loss  0.0221769735217
loop took 0.28878259658813477 seconds
TIMESTEP 11729 / STATE explore / EPSILON 0.09962610760007215 / ACTION 0 / REWARD 0.1 / Q_MAX  0.515451 / Loss  0.0101163061336
loop took 0.29993534088134766 seconds
TIMESTEP 11730 / STATE explore / EPSILON 0.09962607430007216 / ACTION 0 / REWARD 0.1 / Q_MAX  0.249818 / Loss  0.0148909687996
loop took 0.3109402656555176 seconds
TIMESTEP 11731 / STATE explore / EPSILON 0.09962604100007216 / ACTION 0 / REWARD 0.1 / Q_MAX  0.113142 / Loss  0.0121621582657
loop took 0.32138895988464355 seconds
TIMESTEP 11732 / STATE explore / EPSILON 0.09962600770007217 / ACTION 0 / REWARD 0.1 / Q_MAX  0.51554 / Loss  0.018685773015
loop took 0.2945132255554199 seconds
TIMESTEP 11733 / STATE explore / EPSILON 0.09962597440007218 / ACTION 0 / REWARD -1 / Q_MAX  0.555829 / Loss  0.0102226380259
loop took 0.2877631187438965 seconds
TIMESTEP 11734 /

TIMESTEP 11778 / STATE explore / EPSILON 0.09962447590007247 / ACTION 0 / REWARD 0.1 / Q_MAX  0.272882 / Loss  0.0220252443105
loop took 0.3002619743347168 seconds
TIMESTEP 11779 / STATE explore / EPSILON 0.09962444260007247 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.981139 / Loss  0.0243025887758
loop took 0.29793286323547363 seconds
TIMESTEP 11780 / STATE explore / EPSILON 0.09962440930007248 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.862473 / Loss  0.00705604255199
loop took 0.31487488746643066 seconds
TIMESTEP 11781 / STATE explore / EPSILON 0.09962437600007248 / ACTION 0 / REWARD 0.1 / Q_MAX  0.486467 / Loss  0.0247321482748
loop took 0.3071739673614502 seconds
TIMESTEP 11782 / STATE explore / EPSILON 0.09962434270007249 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.912129 / Loss  0.013371582143
loop took 0.32689404487609863 seconds
TIMESTEP 11783 / STATE explore / EPSILON 0.0996243094000725 / ACTION 0 / REWARD -1 / Q_MAX  0.523216 / Loss  0.017275871709
loop took 0.32266831398010254 seconds
TIMESTEP 117

TIMESTEP 11828 / STATE explore / EPSILON 0.09962281090007279 / ACTION 0 / REWARD 0.1 / Q_MAX  0.419867 / Loss  0.00814326573163
loop took 0.29937267303466797 seconds
TIMESTEP 11829 / STATE explore / EPSILON 0.09962277760007279 / ACTION 0 / REWARD 0.1 / Q_MAX  0.439954 / Loss  0.0084927175194
loop took 0.30462121963500977 seconds
TIMESTEP 11830 / STATE explore / EPSILON 0.0996227443000728 / ACTION 0 / REWARD 0.1 / Q_MAX  0.297854 / Loss  0.0337952598929
loop took 1.0217196941375732 seconds
TIMESTEP 11831 / STATE explore / EPSILON 0.0996227110000728 / ACTION 1 / REWARD 0.1 / Q_MAX  0.133778 / Loss  0.00622768281028
loop took 0.2775914669036865 seconds
TIMESTEP 11832 / STATE explore / EPSILON 0.09962267770007281 / ACTION 0 / REWARD 0.1 / Q_MAX  0.142701 / Loss  0.00680278101936
loop took 0.2818019390106201 seconds
TIMESTEP 11833 / STATE explore / EPSILON 0.09962264440007282 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.375649 / Loss  0.00823467131704
loop took 0.3066740036010742 seconds
TIMESTEP 11

TIMESTEP 11878 / STATE explore / EPSILON 0.09962114590007311 / ACTION 0 / REWARD -1 / Q_MAX  0.400044 / Loss  0.00875490903854
loop took 0.34048008918762207 seconds
TIMESTEP 11879 / STATE explore / EPSILON 0.09962111260007311 / ACTION 0 / REWARD 0.1 / Q_MAX  0.237202 / Loss  0.00900947675109
loop took 0.3669753074645996 seconds
TIMESTEP 11880 / STATE explore / EPSILON 0.09962107930007312 / ACTION 0 / REWARD 0.1 / Q_MAX  0.240835 / Loss  0.0161443427205
loop took 0.3699667453765869 seconds
TIMESTEP 11881 / STATE explore / EPSILON 0.09962104600007313 / ACTION 0 / REWARD 0.1 / Q_MAX  -0.200969 / Loss  0.0115622617304
loop took 0.2871057987213135 seconds
TIMESTEP 11882 / STATE explore / EPSILON 0.09962101270007313 / ACTION 0 / REWARD 0.1 / Q_MAX  0.0376615 / Loss  0.0127065209672
loop took 0.3145134449005127 seconds
TIMESTEP 11883 / STATE explore / EPSILON 0.09962097940007314 / ACTION 0 / REWARD 0.1 / Q_MAX  0.238067 / Loss  0.0130870044231
loop took 0.28577494621276855 seconds
TIMESTEP 11

StopIteration: 

In [None]:
# # import cv2
# # image = cv2.imread('screenshot.png')
# # image = image[10:110,0:200] #img[y:y+h, x:x+w]
# from matplotlib import pyplot as plt
# plt.imshow(image)
# plt.title('my picture')
# plt.show()