In [1]:
import numpy as np
from PIL import Image
import cv2 #opencv
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

#keras imports
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD , Adam
from keras.callbacks import TensorBoard
from collections import deque
import random
import pickle
from io import BytesIO
import base64
import json
import _pickle as cPickle

Using TensorFlow backend.


In [2]:
#path variables
game_url = "chrome://dino"
chrome_driver_path = "F:/DinoRunTutorial-master (1)/DinoRunTutorial-master/chromedriver.exe"
loss_file_path = "./Object2/loss_df.csv"
actions_file_path = "./Object2/actions_df.csv"
q_value_file_path = "./Object2/q_values.csv"
scores_file_path = "./Object2/scores_df.csv"

#scripts
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

In [3]:
'''
* Game class: Selenium interfacing between the python and browser
* __init__():  Launch the broswer window using the attributes in chrome_options
* get_crashed() : return true if the agent as crashed on an obstacles. Gets javascript variable from game decribing the state
* get_playing(): true if game in progress, false is crashed or paused
* restart() : sends a signal to browser-javascript to restart the game
* press_up(): sends a single to press up get to the browser
* get_score(): gets current game score from javascript variables.
* pause(): pause the game
* resume(): resume a paused game if not crashed
* end(): close the browser and end the game
'''
class Game:
    def __init__(self,custom_config=True):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path,chrome_options=chrome_options)
        self._driver.set_window_position(x=-10,y=0)
        self._driver.get('chrome://dino')
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array) # the javascript object is of type array with score in the formate[1,0,0] which is 100.
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        self._driver.close()

In [4]:
class DinoAgent:
    def __init__(self,game): #takes game as input for taking actions
        self._game = game; 
        self.jump(); #to start the game, we need to jump once
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

In [5]:
class Game_sate:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = show_img() #display the processed image on screen using openCV, implemented using python coroutine 
        self._display.__next__() # initiliaze the display coroutine 
    def get_state(self,actions):
        actions_df.loc[len(actions_df)] = actions[1] # storing actions in a dataframe
        score = self._game.get_score() 
        reward = 0.1
        is_over = False #game over
        if actions[1] == 1:
            self._agent.jump()
        image = grab_screen(self._game._driver) 
        self._display.send(image) #display the image on screen
        if self._agent.is_crashed():
            scores_df.loc[len(loss_df)] = score # log the score when game is over
            self._game.restart()
            reward = -1
            is_over = True
        return image, reward, is_over #return the Experience tuple

In [6]:
def save_obj(obj, name ):
    with open('Object2'+ name + '.pkl', 'wb') as f: #dump files into objects folder
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
def load_obj(name ):
    with open('Object2' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = process_img(screen)#processing image as required
    return image

def process_img(image):
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #RGB to Grey Scale
    image = image[:300, :500] #Crop Region of Interest(ROI)
    image = cv2.resize(image, (80,80))
    return  image

def show_img(graphs = False):
    """
    Show images in new window
    """
    while True:
        screen = (yield)
        window_title = "logs" if graphs else "game_play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)        
        imS = cv2.resize(screen, (800, 400)) 
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

In [7]:
#Intialize log structures from file if exists else create new
loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['actions'])
q_values_df =pd.read_csv(actions_file_path) if os.path.isfile(q_value_file_path) else pd.DataFrame(columns = ['qvalues'])

In [8]:
#game parameters
ACTIONS = 2 # possible actions: jump, do nothing
GAMMA = 0.99 # decay rate of past observations original 0.99
OBSERVATION = 100. # timesteps to observe before training
EXPLORE = 100000  # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 16 # size of minibatch
FRAME_PER_ACTION = 1
LEARNING_RATE = 1e-4
img_rows , img_cols = 80,80
img_channels = 4 #We stack 4 frames

In [9]:
# training variables saved as checkpoints to filesystem to resume training from the same step
def init_cache():
    """initial variable caching, done only once"""
    save_obj(INITIAL_EPSILON,"epsilon")
    t = 0
    save_obj(t,"time")
    D = deque()
    save_obj(D,"D")

In [10]:
'''Call only once to init file structure
'''
#init_cache()

'Call only once to init file structure\n'

In [11]:
def buildmodel():
    print("Now we build the model")
    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(img_cols,img_rows,img_channels)))  #80*80*4
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(ACTIONS))
    adam = Adam(lr=LEARNING_RATE)
    model.compile(loss='mse',optimizer=adam)
    
    #create model file if not present
    if not os.path.isfile(loss_file_path):
        model.save_weights('model.h5')
    print("We finish building the model")
    return model

In [12]:
''' 
main training module
Parameters:
* model => Keras Model to be trained
* game_state => Game State module with access to game environment and dino
* observe => flag to indicate wherther the model is to be trained(weight updates), else just play
'''
def trainNetwork(model,game_state,observe=False):
    last_time = time.time()
    # store the previous observations in replay memory
    D = load_obj("D") #load from file system
    # get the first state by doing nothing
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] =1 #0 => do nothing,
                     #1=> jump
    
    x_t, r_0, terminal = game_state.get_state(do_nothing) # get next step after performing the action
    

    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # stack 4 images to create placeholder input
    

    
    s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2])  #1*20*40*4
    
    initial_state = s_t 

    if observe :
        OBSERVE = 999999999    #We keep observe, never train
        epsilon = FINAL_EPSILON
        print ("Now we load weight")
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)
        print ("Weight load successfully")    
    else:                       #We go to training mode
        OBSERVE = OBSERVATION
        epsilon = load_obj("epsilon") 
        model.load_weights("model.h5")
        adam = Adam(lr=LEARNING_RATE)
        model.compile(loss='mse',optimizer=adam)

    t = load_obj("time") # resume from the previous time step stored in file system
    while (True): #endless running
        
        loss = 0
        Q_sa = 0
        action_index = 0
        r_t = 0 #reward at 4
        a_t = np.zeros([ACTIONS]) # action at t
        
        #choose an action epsilon greedy
        if t % FRAME_PER_ACTION == 0: #parameter to skip frames for actions
            if  random.random() <= epsilon: #randomly explore an action
                print("----------Random Action----------")
                action_index = random.randrange(ACTIONS)
                a_t[action_index] = 1
            else: # predict the output
                q = model.predict(s_t)       #input a stack of 4 images, get the prediction
                new_q=np.argmax(q)
                max_Q = epsilon*(sum(q)/len(q))+(1-epsilon)*new_q  # chosing index with maximum q value
                print(new_q)
                
                a_t[action_index] = 1        # o=> do nothing, 1=> jump
                
        #We reduced the epsilon (exploration parameter) gradually
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE 

        #run the selected action and observed next state and reward
        x_t1, r_t, terminal = game_state.get_state(a_t)
        print('fps: {0}'.format(1 / (time.time()-last_time))) # helpful for measuring frame rate
        last_time = time.time()
        x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x20x40x1
        s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one
        
        
        # store the transition in D
        D.append((s_t, action_index, r_t, s_t1, terminal))
        if len(D) > REPLAY_MEMORY:
            D.popleft()

        #only train if done observing
        if t > OBSERVE: 
            
            #sample a minibatch to train on
            minibatch = random.sample(D, BATCH)
            inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3]))   #32, 20, 40, 4
            targets = np.zeros((inputs.shape[0], ACTIONS))                         #32, 2

            #Now we do the experience replay
            for i in range(0, len(minibatch)):
                state_t = minibatch[i][0]    # 4D stack of images
                action_t = minibatch[i][1]   #This is action index
                reward_t = minibatch[i][2]   #reward at state_t due to action_t
                state_t1 = minibatch[i][3]   #next state
                terminal = minibatch[i][4]   #wheather the agent died or survided due the action
                

                inputs[i:i + 1] = state_t    

                targets[i] = model.predict(state_t)  # predicted q values
                Q_sa = model.predict(state_t1)      #predict q values for next step
                
                if terminal:
                    targets[i, action_t] = reward_t # if terminated, only equals reward
                else:
                    targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)

            loss += model.train_on_batch(inputs, targets)
            loss_df.loc[len(loss_df)] = loss
            q_values_df.loc[len(q_values_df)] = np.max(Q_sa)
        s_t = initial_state if terminal else s_t1 #reset game to initial frame if terminate
        t = t + 1
        
        # save progress every 1000 iterations
        if t % 1000 == 0:
            print("Now we save model")
            game_state._game.pause() #pause game while saving to filesystem
            model.save_weights("model.h5", overwrite=True)
            save_obj(D,"D") #saving episodes
            save_obj(t,"time") #caching time steps
            save_obj(epsilon,"epsilon") #cache epsilon to avoid repeated randomness in actions
            loss_df.to_csv("./Object2/loss_df.csv",index=False)
            scores_df.to_csv("./Object2/scores_df.csv",index=False)
            actions_df.to_csv("./Object2/actions_df.csv",index=False)
            q_values_df.to_csv(q_value_file_path,index=False)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            clear_output()
            game_state._game.resume()
        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"

        print("TIMESTEP", t, "/ STATE", state,             "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t,             "/ Q_MAX " , np.max(Q_sa), "/ Loss ", loss)

    print("Episode finished!")
    print("************************")


In [13]:
#main function
def playGame(observe=False):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_sate(dino,game)    
    model = buildmodel()
    try:
        trainNetwork(model,game_state,observe=observe)
    except StopIteration:
        game.end()

In [None]:
playGame(observe=False);

TIMESTEP 18000 / STATE explore / EPSILON 0.08211889899997268 / ACTION 0 / REWARD 0.1 / Q_MAX  20.652012 / Loss  0.17089301347732544
0
fps: 0.2523085703863422
TIMESTEP 18001 / STATE explore / EPSILON 0.08211789999997268 / ACTION 0 / REWARD 0.1 / Q_MAX  11.521734 / Loss  0.14950916171073914
0
fps: 6.89727845291148
TIMESTEP 18002 / STATE explore / EPSILON 0.08211690099997268 / ACTION 0 / REWARD 0.1 / Q_MAX  13.135135 / Loss  0.08257408440113068
0
fps: 5.174939512870402
TIMESTEP 18003 / STATE explore / EPSILON 0.08211590199997268 / ACTION 0 / REWARD 0.1 / Q_MAX  9.512302 / Loss  0.3345215916633606
0
fps: 7.443334315294819
TIMESTEP 18004 / STATE explore / EPSILON 0.08211490299997268 / ACTION 0 / REWARD 0.1 / Q_MAX  0.036021717 / Loss  0.5101460814476013
0
fps: 6.80787409754324
TIMESTEP 18005 / STATE explore / EPSILON 0.08211390399997268 / ACTION 0 / REWARD 0.1 / Q_MAX  3.9480264 / Loss  0.16185492277145386
0
fps: 6.720424025777383
TIMESTEP 18006 / STATE explore / EPSILON 0.08211290499997267

fps: 6.831080629899202
TIMESTEP 18052 / STATE explore / EPSILON 0.0820669509999726 / ACTION 0 / REWARD 0.1 / Q_MAX  10.5789795 / Loss  0.5769277811050415
----------Random Action----------
fps: 7.063472335896478
TIMESTEP 18053 / STATE explore / EPSILON 0.0820659519999726 / ACTION 0 / REWARD 0.1 / Q_MAX  13.310949 / Loss  6.291820526123047
0
fps: 7.099856286573713
TIMESTEP 18054 / STATE explore / EPSILON 0.0820649529999726 / ACTION 0 / REWARD 0.1 / Q_MAX  7.592062 / Loss  0.47139772772789
0
fps: 7.006317590028163
TIMESTEP 18055 / STATE explore / EPSILON 0.0820639539999726 / ACTION 0 / REWARD 0.1 / Q_MAX  13.577353 / Loss  0.07798435539007187
0
fps: 7.1318358810426625
TIMESTEP 18056 / STATE explore / EPSILON 0.0820629549999726 / ACTION 0 / REWARD 0.1 / Q_MAX  12.962765 / Loss  0.1458704173564911
0
fps: 6.0643403881243545
TIMESTEP 18057 / STATE explore / EPSILON 0.0820619559999726 / ACTION 0 / REWARD 0.1 / Q_MAX  12.881995 / Loss  0.09836426377296448
0
fps: 6.9440624622317735
TIMESTEP 1805

TIMESTEP 18104 / STATE explore / EPSILON 0.08201500299997252 / ACTION 0 / REWARD 0.1 / Q_MAX  24.716526 / Loss  0.2213757336139679
0
fps: 7.0672570785396065
TIMESTEP 18105 / STATE explore / EPSILON 0.08201400399997252 / ACTION 0 / REWARD 0.1 / Q_MAX  13.609979 / Loss  0.2640796899795532
0
fps: 7.097933896185433
TIMESTEP 18106 / STATE explore / EPSILON 0.08201300499997252 / ACTION 0 / REWARD 0.1 / Q_MAX  16.114012 / Loss  0.1300048828125
0
fps: 6.893877483933531
TIMESTEP 18107 / STATE explore / EPSILON 0.08201200599997252 / ACTION 0 / REWARD 0.1 / Q_MAX  11.040898 / Loss  0.6688874363899231
0
fps: 6.752644755165946
TIMESTEP 18108 / STATE explore / EPSILON 0.08201100699997252 / ACTION 0 / REWARD 0.1 / Q_MAX  12.930067 / Loss  0.016799315810203552
0
fps: 7.014355475282628
TIMESTEP 18109 / STATE explore / EPSILON 0.08201000799997252 / ACTION 0 / REWARD 0.1 / Q_MAX  15.114217 / Loss  0.07132730633020401
0
fps: 5.827478026970626
TIMESTEP 18110 / STATE explore / EPSILON 0.08200900899997252 / 

TIMESTEP 18157 / STATE explore / EPSILON 0.08196205599997244 / ACTION 0 / REWARD 0.1 / Q_MAX  36.30782 / Loss  0.705180823802948
0
fps: 7.366169009186847
TIMESTEP 18158 / STATE explore / EPSILON 0.08196105699997244 / ACTION 0 / REWARD 0.1 / Q_MAX  10.2417345 / Loss  0.4532340466976166
0
fps: 6.974268245435212
TIMESTEP 18159 / STATE explore / EPSILON 0.08196005799997244 / ACTION 0 / REWARD 0.1 / Q_MAX  14.578745 / Loss  6.273584365844727
0
fps: 7.011283436583743
TIMESTEP 18160 / STATE explore / EPSILON 0.08195905899997244 / ACTION 0 / REWARD 0.1 / Q_MAX  13.944266 / Loss  0.19953016936779022
0
fps: 6.732754386246529
TIMESTEP 18161 / STATE explore / EPSILON 0.08195805999997244 / ACTION 0 / REWARD 0.1 / Q_MAX  11.633662 / Loss  4.513568878173828
0
fps: 5.548241332328002
TIMESTEP 18162 / STATE explore / EPSILON 0.08195706099997244 / ACTION 0 / REWARD 0.1 / Q_MAX  14.59433 / Loss  0.8621352910995483
0
fps: 7.5360541895376105
TIMESTEP 18163 / STATE explore / EPSILON 0.08195606199997243 / ACT

TIMESTEP 18209 / STATE explore / EPSILON 0.08191010799997236 / ACTION 0 / REWARD 0.1 / Q_MAX  13.724922 / Loss  2.1949450969696045
0
fps: 7.014320284030951
TIMESTEP 18210 / STATE explore / EPSILON 0.08190910899997236 / ACTION 0 / REWARD 0.1 / Q_MAX  17.748644 / Loss  0.16236063838005066
0
fps: 6.941315874833679
TIMESTEP 18211 / STATE explore / EPSILON 0.08190810999997236 / ACTION 0 / REWARD 0.1 / Q_MAX  15.782829 / Loss  0.5364112854003906
0
fps: 6.740447305703895
TIMESTEP 18212 / STATE explore / EPSILON 0.08190711099997236 / ACTION 0 / REWARD 0.1 / Q_MAX  14.751427 / Loss  0.3029039800167084
0
fps: 6.665973735360634
TIMESTEP 18213 / STATE explore / EPSILON 0.08190611199997236 / ACTION 0 / REWARD 0.1 / Q_MAX  24.781258 / Loss  0.146317720413208
0
fps: 6.669121692314053
TIMESTEP 18214 / STATE explore / EPSILON 0.08190511299997236 / ACTION 0 / REWARD 0.1 / Q_MAX  15.90839 / Loss  0.11872511357069016
0
fps: 6.028542230513941
TIMESTEP 18215 / STATE explore / EPSILON 0.08190411399997236 / A

TIMESTEP 18261 / STATE explore / EPSILON 0.08185815999997229 / ACTION 0 / REWARD 0.1 / Q_MAX  12.44299 / Loss  0.17751525342464447
0
fps: 6.838029994505844
TIMESTEP 18262 / STATE explore / EPSILON 0.08185716099997228 / ACTION 0 / REWARD 0.1 / Q_MAX  19.072922 / Loss  1.5753135681152344
0
fps: 6.879866939829312
TIMESTEP 18263 / STATE explore / EPSILON 0.08185616199997228 / ACTION 0 / REWARD 0.1 / Q_MAX  19.095852 / Loss  0.25736644864082336
----------Random Action----------
fps: 7.077023276218437
TIMESTEP 18264 / STATE explore / EPSILON 0.08185516299997228 / ACTION 0 / REWARD 0.1 / Q_MAX  19.119055 / Loss  0.11445776373147964
0
fps: 6.935943907924329
TIMESTEP 18265 / STATE explore / EPSILON 0.08185416399997228 / ACTION 0 / REWARD 0.1 / Q_MAX  24.960066 / Loss  0.5306566953659058
0
fps: 7.091129798507488
TIMESTEP 18266 / STATE explore / EPSILON 0.08185316499997228 / ACTION 0 / REWARD 0.1 / Q_MAX  14.627805 / Loss  0.5652918815612793
0
fps: 6.960576189053736
TIMESTEP 18267 / STATE explore

TIMESTEP 18313 / STATE explore / EPSILON 0.0818062119999722 / ACTION 0 / REWARD 0.1 / Q_MAX  14.604901 / Loss  0.7716066837310791
1
fps: 7.304515618931848
TIMESTEP 18314 / STATE explore / EPSILON 0.0818052129999722 / ACTION 0 / REWARD 0.1 / Q_MAX  14.380703 / Loss  0.3606396019458771
1
fps: 7.104690890427317
TIMESTEP 18315 / STATE explore / EPSILON 0.0818042139999722 / ACTION 0 / REWARD 0.1 / Q_MAX  16.519848 / Loss  0.10707289725542068
1
fps: 6.842793562954358
TIMESTEP 18316 / STATE explore / EPSILON 0.0818032149999722 / ACTION 0 / REWARD 0.1 / Q_MAX  21.799673 / Loss  0.08986564725637436
1
fps: 6.948583545939498
TIMESTEP 18317 / STATE explore / EPSILON 0.0818022159999722 / ACTION 0 / REWARD 0.1 / Q_MAX  25.875315 / Loss  0.30134323239326477
1
fps: 7.009221287504303
TIMESTEP 18318 / STATE explore / EPSILON 0.0818012169999722 / ACTION 0 / REWARD 0.1 / Q_MAX  20.584118 / Loss  0.22606973350048065
1
fps: 6.880611828085757
TIMESTEP 18319 / STATE explore / EPSILON 0.0818002179999722 / ACTI

TIMESTEP 18366 / STATE explore / EPSILON 0.08175326499997213 / ACTION 0 / REWARD 0.1 / Q_MAX  26.014034 / Loss  0.2541867792606354
----------Random Action----------
fps: 5.327087942653495
TIMESTEP 18367 / STATE explore / EPSILON 0.08175226599997212 / ACTION 1 / REWARD 0.1 / Q_MAX  3.7711565 / Loss  0.8160411715507507
1
fps: 7.496066369514632
TIMESTEP 18368 / STATE explore / EPSILON 0.08175126699997212 / ACTION 0 / REWARD 0.1 / Q_MAX  11.027028 / Loss  0.1955544799566269
1
fps: 7.512231117797718
TIMESTEP 18369 / STATE explore / EPSILON 0.08175026799997212 / ACTION 0 / REWARD 0.1 / Q_MAX  39.203533 / Loss  1.3413581848144531
1
fps: 6.818398914077169
TIMESTEP 18370 / STATE explore / EPSILON 0.08174926899997212 / ACTION 0 / REWARD 0.1 / Q_MAX  16.664759 / Loss  0.6169121265411377
0
fps: 7.502166956727046
TIMESTEP 18371 / STATE explore / EPSILON 0.08174826999997212 / ACTION 0 / REWARD 0.1 / Q_MAX  4.7436657 / Loss  0.7568023204803467
0
fps: 7.194877152174174
TIMESTEP 18372 / STATE explore /

fps: 7.005533536101605
TIMESTEP 18419 / STATE explore / EPSILON 0.08170031799997204 / ACTION 0 / REWARD 0.1 / Q_MAX  9.967422 / Loss  0.0034199252258986235
0
fps: 6.991368935502045
TIMESTEP 18420 / STATE explore / EPSILON 0.08169931899997204 / ACTION 0 / REWARD 0.1 / Q_MAX  19.93508 / Loss  0.5373921394348145
0
fps: 6.654699441674057
TIMESTEP 18421 / STATE explore / EPSILON 0.08169831999997204 / ACTION 0 / REWARD 0.1 / Q_MAX  10.0360565 / Loss  0.2602693736553192
0
fps: 6.964424895433269
TIMESTEP 18422 / STATE explore / EPSILON 0.08169732099997204 / ACTION 0 / REWARD 0.1 / Q_MAX  12.131873 / Loss  0.2029573619365692
0
fps: 7.070592915759166
TIMESTEP 18423 / STATE explore / EPSILON 0.08169632199997204 / ACTION 0 / REWARD 0.1 / Q_MAX  17.065212 / Loss  0.03620879352092743
0
fps: 7.116395706718517
TIMESTEP 18424 / STATE explore / EPSILON 0.08169532299997204 / ACTION 0 / REWARD 0.1 / Q_MAX  15.922166 / Loss  0.08477656543254852
0
fps: 6.386425337988559
TIMESTEP 18425 / STATE explore / EPSI

TIMESTEP 18471 / STATE explore / EPSILON 0.08164836999997196 / ACTION 0 / REWARD 0.1 / Q_MAX  31.277155 / Loss  0.28058481216430664
0
fps: 7.047225083211379
TIMESTEP 18472 / STATE explore / EPSILON 0.08164737099997196 / ACTION 0 / REWARD 0.1 / Q_MAX  14.3867445 / Loss  0.23189957439899445
0
fps: 7.145955007939374
TIMESTEP 18473 / STATE explore / EPSILON 0.08164637199997196 / ACTION 0 / REWARD 0.1 / Q_MAX  15.167675 / Loss  0.6395567059516907
0
fps: 6.667308869230708
TIMESTEP 18474 / STATE explore / EPSILON 0.08164537299997196 / ACTION 0 / REWARD 0.1 / Q_MAX  17.906805 / Loss  1.9494787454605103
0
fps: 6.746507962039569
TIMESTEP 18475 / STATE explore / EPSILON 0.08164437399997196 / ACTION 0 / REWARD 0.1 / Q_MAX  17.58512 / Loss  0.8233648538589478
0
fps: 7.142523372443506
TIMESTEP 18476 / STATE explore / EPSILON 0.08164337499997196 / ACTION 0 / REWARD 0.1 / Q_MAX  14.0725155 / Loss  0.21177417039871216
0
fps: 6.880623115511941
TIMESTEP 18477 / STATE explore / EPSILON 0.08164237599997196

TIMESTEP 18524 / STATE explore / EPSILON 0.08159542299997188 / ACTION 0 / REWARD 0.1 / Q_MAX  20.536816 / Loss  0.20118248462677002
0
fps: 6.828311507329239
TIMESTEP 18525 / STATE explore / EPSILON 0.08159442399997188 / ACTION 0 / REWARD 0.1 / Q_MAX  8.648792 / Loss  0.3184273838996887
0
fps: 6.824778300274989
TIMESTEP 18526 / STATE explore / EPSILON 0.08159342499997188 / ACTION 0 / REWARD 0.1 / Q_MAX  10.141216 / Loss  0.08705110847949982
0
fps: 6.8163932614581535
TIMESTEP 18527 / STATE explore / EPSILON 0.08159242599997188 / ACTION 0 / REWARD 0.1 / Q_MAX  15.764297 / Loss  0.46308431029319763
1
fps: 6.555956046704284
TIMESTEP 18528 / STATE explore / EPSILON 0.08159142699997188 / ACTION 0 / REWARD 0.1 / Q_MAX  11.414715 / Loss  3.7854671478271484
----------Random Action----------
fps: 6.953975117383346
TIMESTEP 18529 / STATE explore / EPSILON 0.08159042799997188 / ACTION 0 / REWARD 0.1 / Q_MAX  23.051748 / Loss  2.1199398040771484
1
fps: 5.931723653190439
TIMESTEP 18530 / STATE explor

TIMESTEP 18577 / STATE explore / EPSILON 0.0815424759999718 / ACTION 0 / REWARD 0.1 / Q_MAX  12.20918 / Loss  0.8107506632804871
1
fps: 7.0993996245749384
TIMESTEP 18578 / STATE explore / EPSILON 0.0815414769999718 / ACTION 0 / REWARD 0.1 / Q_MAX  3.178963 / Loss  0.24954117834568024
1
fps: 6.629654568503046
TIMESTEP 18579 / STATE explore / EPSILON 0.0815404779999718 / ACTION 0 / REWARD 0.1 / Q_MAX  9.626355 / Loss  0.7279754281044006
1
fps: 6.950886612972722
TIMESTEP 18580 / STATE explore / EPSILON 0.0815394789999718 / ACTION 0 / REWARD 0.1 / Q_MAX  14.846255 / Loss  0.48526766896247864
1
fps: 7.089379884793249
TIMESTEP 18581 / STATE explore / EPSILON 0.0815384799999718 / ACTION 0 / REWARD 0.1 / Q_MAX  33.939796 / Loss  0.3048800826072693
1
fps: 5.350157342412039
TIMESTEP 18582 / STATE explore / EPSILON 0.0815374809999718 / ACTION 0 / REWARD 0.1 / Q_MAX  9.429135 / Loss  0.15569910407066345
0
fps: 7.1855586563944955
TIMESTEP 18583 / STATE explore / EPSILON 0.0815364819999718 / ACTION 

TIMESTEP 18631 / STATE explore / EPSILON 0.08148852999997172 / ACTION 0 / REWARD 0.1 / Q_MAX  9.583448 / Loss  0.7450729608535767
0
fps: 6.994038656188041
TIMESTEP 18632 / STATE explore / EPSILON 0.08148753099997172 / ACTION 0 / REWARD 0.1 / Q_MAX  11.833639 / Loss  0.3498564660549164
0
fps: 6.853426710087075
TIMESTEP 18633 / STATE explore / EPSILON 0.08148653199997172 / ACTION 0 / REWARD 0.1 / Q_MAX  5.5644774 / Loss  0.158582866191864
0
fps: 6.7972275026294
TIMESTEP 18634 / STATE explore / EPSILON 0.08148553299997172 / ACTION 0 / REWARD 0.1 / Q_MAX  7.759956 / Loss  0.162679985165596
----------Random Action----------
fps: 4.522492215062668
TIMESTEP 18635 / STATE explore / EPSILON 0.08148453399997171 / ACTION 1 / REWARD 0.1 / Q_MAX  6.3162317 / Loss  0.31077373027801514
0
fps: 6.560662963721882
TIMESTEP 18636 / STATE explore / EPSILON 0.08148353499997171 / ACTION 0 / REWARD 0.1 / Q_MAX  26.093277 / Loss  0.5996520519256592
0
fps: 6.180691821860439
TIMESTEP 18637 / STATE explore / EPSI

TIMESTEP 18683 / STATE explore / EPSILON 0.08143658199997164 / ACTION 0 / REWARD 0.1 / Q_MAX  15.962173 / Loss  0.27895501255989075
1
fps: 6.821381872117305
TIMESTEP 18684 / STATE explore / EPSILON 0.08143558299997164 / ACTION 0 / REWARD 0.1 / Q_MAX  6.500634 / Loss  0.043022893369197845
0
fps: 7.229414725288363
TIMESTEP 18685 / STATE explore / EPSILON 0.08143458399997164 / ACTION 0 / REWARD 0.1 / Q_MAX  13.558735 / Loss  0.09425641596317291
1
fps: 5.897735006440073
TIMESTEP 18686 / STATE explore / EPSILON 0.08143358499997164 / ACTION 0 / REWARD 0.1 / Q_MAX  16.369196 / Loss  0.2025769054889679
0
fps: 6.939834309814504
TIMESTEP 18687 / STATE explore / EPSILON 0.08143258599997164 / ACTION 0 / REWARD 0.1 / Q_MAX  4.701557 / Loss  1.1918710470199585
0
fps: 7.146782988004362
TIMESTEP 18688 / STATE explore / EPSILON 0.08143158699997163 / ACTION 0 / REWARD 0.1 / Q_MAX  16.418455 / Loss  0.4276941120624542
0
fps: 6.937274645883917
TIMESTEP 18689 / STATE explore / EPSILON 0.08143058799997163 /

TIMESTEP 18736 / STATE explore / EPSILON 0.08138363499997156 / ACTION 0 / REWARD 0.1 / Q_MAX  13.4326725 / Loss  0.2977401316165924
0
fps: 5.958139785954356
TIMESTEP 18737 / STATE explore / EPSILON 0.08138263599997156 / ACTION 0 / REWARD 0.1 / Q_MAX  19.052216 / Loss  0.5167104601860046
0
fps: 7.94911720781436
TIMESTEP 18738 / STATE explore / EPSILON 0.08138163699997156 / ACTION 0 / REWARD 0.1 / Q_MAX  7.14755 / Loss  0.435971736907959
0
fps: 6.934659541823184
TIMESTEP 18739 / STATE explore / EPSILON 0.08138063799997156 / ACTION 0 / REWARD 0.1 / Q_MAX  12.174788 / Loss  0.2148403823375702
0
fps: 7.072715192925774
TIMESTEP 18740 / STATE explore / EPSILON 0.08137963899997155 / ACTION 0 / REWARD 0.1 / Q_MAX  18.042965 / Loss  0.9460672736167908
----------Random Action----------
fps: 7.288965507653381
TIMESTEP 18741 / STATE explore / EPSILON 0.08137863999997155 / ACTION 0 / REWARD 0.1 / Q_MAX  16.141024 / Loss  1.1932703256607056
0
fps: 6.921545006584397
TIMESTEP 18742 / STATE explore / EP