# Welcome to simple RL (AI) T-Rex (Chrome Dino) projects
###### Autors: MK & MP

In [1]:
# Uncomment this line to install proper package if not installed
# !pip3 install selenium
# !pip3 install --upgrade pip
# !pip3 install --upgrade setuptools
# !pip install line_profiler
# !pip install mss
# !pip install opencv-python
# !pip install numpy
# !pip install matplotlib
# !pip install image
# !pip install sklearn

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import os
import time
import random
import json
import IPython
import PIL
from PIL import Image
from mss.darwin import MSS as mss
import mss.tools
import numpy as np
from sklearn.utils import shuffle

In [2]:
roi = {
    "top": 0, 
    "left": 0, 
    "width": 500, 
    "height": 300,
    'top_i': 130,
    'left_i': 20,
    'width_i': -40,
    'height_i': -150
}

In [3]:
def run_dino_game(CHROMEDRIVER_PATH):
    # You must install CHROMEDRIVER from https://chromedriver.storage.googleapis.com/index.html?path=73.0.3683.68/
    # Then you must unzip proper verison on set path to it below
    # For now I deliver chromedriver for chrome 73 near to this file
    
    CHROMEDRIVER_PATH = CHROMEDRIVER_PATH
    
    # Check path to chromedriver
    if CHROMEDRIVER_PATH is None:
        print("Set path to CHROMEDRIVER!!!!")
        raise KeyboardInterrupt
    elif not os.path.isfile(CHROMEDRIVER_PATH):
        print("I do not see file under the path!!!")
        raise KeyboardInterrupt
    
    # Initialize chromedriver
    options = Options()
#     options.add_argument("--headless") # Runs Chrome in headless mode. (No window show)
    options.add_argument('--no-sandbox') # Bypass OS security model
    options.add_argument('--hide-scrollbars')
    options.add_argument('disable-infobars')
    options.add_argument('--disable-gpu')
    options.add_argument('--disable-extensions')
    options.add_argument('--window-position={},{}'.format(roi['top'],roi['left']))
    options.add_argument('--window-size={},{}'.format(roi['width'],roi['height']))
    options.add_argument('--no-proxy-server')
    driver = webdriver.Chrome(options=options, executable_path=CHROMEDRIVER_PATH)
    # go to proper URL adress for game
    driver.get("chrome://dino")
    return driver

In [4]:
def get_game_info(driver):
    runner = driver.execute_script("""return {
                                   crashed: Runner().crashed,
                                   playCount: Runner().playCount,
                                   tRex_status: Runner().tRex.status,
                                   runningTime: Runner().runningTime,
                                   obstacles: Runner().horizon.obstacles
                                   }""")

    to_return =  {
        'crashed':runner['crashed'],
        'runningTime':runner['runningTime'],
        'playCount':runner['playCount'],
        'tRex_status':runner['tRex_status'],
        'obstacles': []
    }
    
    for idx,ob in enumerate(runner['obstacles']):
        to_return['obstacles'].append({
            'type':ob['typeConfig']['type'],
            'xPos':ob['xPos'],
            'yPos':150 - ob['yPos'],
            'width':ob['width']
        })

#     with mss.mss() as sct:
#         res = driver.get_window_rect()
#         monit = {
#             "top": res['y']+roi['top_i'], 
#             "left": res['x']+roi['left_i'], 
#             "width": res['width']+roi['width_i'], 
#             "height": res['height']+roi['height_i']
#         }
#         sct_img = sct.grab(monit)
#         to_return['screen'] = np.array(sct_img)
    return to_return

In [5]:
# https://godoc.org/github.com/unixpickle/muniverse/chrome
def dispatch_key_event(driver, name, options = {}):
    options["type"] = name
    body = json.dumps({'cmd': 'Input.dispatchKeyEvent', 'params': options})
    resource = "/session/%s/chromium/send_command" % driver.session_id
    url = driver.command_executor._url + resource
    driver.command_executor._request('POST', url, body)

In [6]:
def press_key_up(driver):
    options = { \
    "code": "ArrowUp",
    "key": "ArrowUp",
    "text": "",
    "unmodifiedText": "",
    "nativeVirtualKeyCode": 38,
    "windowsVirtualKeyCode": 38
    }
    dispatch_key_event(driver, "rawKeyDown", options)
    dispatch_key_event(driver, "char", options)
    dispatch_key_event(driver, "keyUp", options)

In [7]:
def hold_key_down(driver):
    options = { \
    "code": "ArrowDown",
    "key": "ArrowDown",
    "text": "",
    "unmodifiedText": "",
    "nativeVirtualKeyCode": 40,
    "windowsVirtualKeyCode": 40
    }
    dispatch_key_event(driver, "rawKeyDown", options)
    dispatch_key_event(driver, "char", options)

In [8]:
def release_key(driver):
    options_up = { \
    "code": "ArrowUp",
    "key": "ArrowUp",
    "text": "",
    "unmodifiedText": "",
    "nativeVirtualKeyCode": 38,
    "windowsVirtualKeyCode": 38
    }
    options_down = { \
    "code": "ArrowDown",
    "key": "ArrowDown",
    "text": "",
    "unmodifiedText": "",
    "nativeVirtualKeyCode": 40,
    "windowsVirtualKeyCode": 40
    }
    dispatch_key_event(driver, "keyUp", options_up)
    dispatch_key_event(driver, "keyUp", options_down)

In [9]:
def do_action(driver, action):
    if action == 'n':
        release_key(driver)
    if action == 'r':
        release_key(driver)
        driver.execute_script('Runner().restart()')
    if action == 'j':
        release_key(driver)
        press_key_up(driver)
    if action == 'd':
        hold_key_down(driver)

In [10]:
last_frame_to_fps = 10
last_frame_idx = 0
list_to_calc_fps = [1]*last_frame_to_fps

def print_info_about_game(last_trex_status,game_data,last_time):
    global last_frame_idx
    global list_to_calc_fps
    
    last_frame_idx += 1
    if last_frame_idx == last_frame_to_fps:
        last_frame_idx = 0
    list_to_calc_fps[last_frame_idx] = time.time() - last_time
    
    avg_fps = (1/np.mean(list_to_calc_fps))
    #print('{:10.2f} fps:{:3.1f} {}'.format(game_data['runningTime'],avg_fps,game_data['tRex_status']))
    if game_data['tRex_status'] != last_trex_status:
        #print('{:10.2f} fps:{:3.1f} {}'.format(game_data['runningTime'],avg_fps,game_data['tRex_status']))
        #print(game_data['obstacles'])
        last_trex_status = game_data['tRex_status']
    return last_trex_status

In [11]:
# dictionary mapping actions to integers
action_dict = {
    0: 'n',
    1: 'j',
    2: 'd'
}

# dictionary mapping action integers to one hot vectors
action_input_dict = {
    0: [1, -1, -1],
    1: [-1, 1, -1],
    2: [-1, -1, 1]
}

# dictionary mamping obstacle type to one hot vectors
# obstacles_dict = {
#     'CACTUS_LARGE': [1, -1, -1], 
#     'CACTUS_SMALL': [-1, 1, -1],
#     'PTERODACTYL': [-1, -1, 1]
# }

In [77]:
# function which receives game_data dictionary and returns the state (np.array of neural network input data)
# parameters "max_{}" are needed to normalize the data (scale from -1 to 1)
def get_state(game_data, max_x=500, max_y=105, max_w=80, max_runningTime=100000):
    
    # our dino will see only first three obstacles stored in game_data dict
    # here we define a dictionary which describe the obstacles
    # the dictionary has three main keys [0, 1, 2], each of them is responsible for different obstacle
    # nested keys provide information about specific parameters
    obstacles = {}
    obstacles[0] = {}
    obstacles[1] = {}
    obstacles[2] = {}
    # position x, position y, width
    obstacles[0]['x'] = [0]
    obstacles[0]['y'] = [0]
    obstacles[0]['w'] = [0]
    # one-hot encoded class of obstacle (small cactus, large cactus, pterodactyl)
    #obstacles[0]['type'] = [0, 0, 0]
    # marker if obstacle exists (sometimes there are no obstacles)
    obstacles[0]['is_obst'] = [-1]
    
    obstacles[1]['x'] = [0]
    obstacles[1]['y'] = [0]
    obstacles[1]['w'] = [0]
    #obstacles[1]['type'] = [0, 0, 0]
    obstacles[1]['is_obst'] = [-1]
    
    obstacles[2]['x'] = [0]
    obstacles[2]['y'] = [0]
    obstacles[2]['w'] = [0]
    #obstacles[2]['type'] = [0, 0, 0]
    obstacles[2]['is_obst'] = [-1]
    
    # iterate over obstacles in game_data  
    for i, obstacle in enumerate(game_data['obstacles']):
        obstacles[i]['x'] = [(obstacle['xPos'] / max_x * 2) - 1.]
        obstacles[i]['y'] = [(obstacle['yPos'] / max_y * 2) - 1.]
        obstacles[i]['w'] = [(obstacle['width'] / max_w * 2) - 1.]
        obstacles[i]['type'] = obstacles_dict[obstacle['type']]
        obstacles[i]['is_obst'] = [1]
      
    
    runningtime = [(game_data['runningTime'] / max_runningTime / 2) - 1.]
    input_data = np.concatenate([
        obstacles[0]['x'], obstacles[0]['y'], obstacles[0]['w'], obstacles[0]['is_obst'], 
        obstacles[1]['x'], obstacles[1]['y'], obstacles[1]['w'], obstacles[1]['is_obst'],
        obstacles[2]['x'], obstacles[2]['y'], obstacles[2]['w'], obstacles[2]['is_obst'],
        runningtime]).reshape(-1, 13)
    
    return input_data

In [96]:
def change_decision_to_bad_one(history_decisions):
    # using this methods allows to convert a given decision to a "bad decision"
    # if dino dies, last decisions's marker "if dino survived after taking a given decision?" is changed to -1
    history_decisions[-1:, -1] = -1
    # this function also converts decision array (i.e. [0.2 0.7 0.1]) to one hot array ([0 1 0])
    # it allows to emphasise which action was bad
    dec_1h = np.zeros(3)
    dec_1h[np.argmax(history_decisions[-1:, :3])] = 1
    history_decisions[-1:, :3] = dec_1h
    
    return history_decisions

In [97]:
def main_loop(driver,show_images):
    last_trex_status = None
    last_time = time.time()
    frame_iter = 0
    
    # placeholders for data history
    # history_data accumulates input_data returned by function get_state()
    history_data = np.empty((0, 13), np.float32)
    # history_decisions stores actions taken for a given input data
    history_decisions = np.empty((0, 4), np.float32)
    # history_weights stores weights of importance for all actions
    history_weights = np.empty((0, 1), np.float32)
    # iterator which holds number of actions done in the last game
    # when dino dies, this iterator is used to change all the weights from the last game, then it's reseted
    # i.e. dino dies -> history_weights[-last_game_number_of_actions:] *= score_from_last_game
    # it allows us to increase weights values if score was high
    last_game_number_of_actions = 0
    
    while True:
        game_data = get_game_info(driver)
        frame_iter += 1
        
        last_trex_status = print_info_about_game(last_trex_status,game_data,last_time)
        last_time = time.time()
        
        if game_data['tRex_status'] == 'WAITING':
            # Here tRex wait for start
            do_action(driver,'j')

        if ((show_images) and (frame_iter % 100 == 0)):
            IPython.display.display(PIL.Image.fromarray(game_data['screen']))

        if game_data['tRex_status'] in ['RUNNING', 'DUCKING']:
                
            # retrieving input_data with get_state()
            input_data = get_state(game_data)
            # model prediction (3 elements array of probabilities)
            pred = model.predict(input_data)
            # converting probabilities to decision (i.e. [0.2 0.7 0.1] -> 1)
            decision = np.argmax(pred)
            do_action(driver, action_dict[decision])
            # concatenating predictions array with marker "did dino survived after taking a given decision?"
            pred = np.concatenate([pred, [[1]]], axis=-1)
            # appending input_data to history_data array
            history_data = np.append(history_data, input_data, axis=0)
            # appending decision to history_decisions array
            history_decisions = np.append(history_decisions, pred, axis=0)
            # appending weights to history_weights array
            # decisions of jumping have bigger weights than running/ducking
            if action_dict[decision] == 'j':
                history_weights = np.append(history_weights, 0.025)
            else:
                history_weights = np.append(history_weights, 0.001)
            
            # incrementing last_game_number_of_actions value
            last_game_number_of_actions += 1

        # end process when dino died
        if game_data['crashed']:
            # prints to check out whats going inside
            print(pred)
            # first obstacle
            print(input_data[0][:4])
            # 2nd obstacle
            print(input_data[0][4:8])
            # 3rd obstacle
            print(input_data[0][8:12])
            # running time value converted to (-1, 1) range
            print(input_data[0][12])
            
            # converting the last decision in history_decisions to bad decision
            history_decisions = change_decision_to_bad_one(history_decisions)
            
            # multiplying the weights of the last game by a runningTime value
            history_weights[-last_game_number_of_actions:] *= (game_data['runningTime'] / 5000)
            # changing weight of the last bad decision
            history_weights[-1:] = 1.
            
            # resetting last_game_number_of_actions
            last_game_number_of_actions = 0
            
            print('retraining model...')
            print(history_data.shape, history_decisions.shape)
    
            # training the model
            # we will use only last 150k examples, epochs and batch size aren't adjusted
            model.fit(history_data[-150000:], 
                      history_decisions[-150000:], epochs=2, batch_size=128, 
                      sample_weight=history_weights[-150000:], shuffle=True)
            
            # also saving the model's weights after each game
            model.save_weights('model_dino.h5')
            time.sleep(0.3)
            # print("You died!!!!")
            do_action(driver,'r')


In [98]:
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout
from keras.optimizers import Adam, SGD


model = Sequential()
model.add(Dense(256, input_shape=(13,), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

adam = Adam()
sgd = SGD(momentum=0.9)
model.compile(adam, loss='categorical_crossentropy')

In [99]:
def main():
    try:
        driver = run_dino_game(CHROMEDRIVER_PATH='./chromedriver')
        time.sleep(1.0)
        show_images = False
        main_loop(driver,show_images)
    finally:
        driver.quit()

In [100]:
# model.load_weights('models/500.h5')

In [101]:
# %lprun -f get_game_info main()
main()

[[0.30238634 0.3308889  0.36672476 1.        ]]
[-0.728      -0.14285714 -0.15        1.        ]
[ 0.  0.  0. -1.]
[ 0.  0.  0. -1.]
-0.9786693250000098
retraining model...
(549, 13) (549, 4)
Be aware that dense_48 doesn't have expected shape (3,). It has shape (4,)
Epoch 1/2
Epoch 2/2
[[0.29458857 0.35279724 0.35261419 1.        ]]
[-0.136      -0.14285714  0.275       1.        ]
[ 0.  0.  0. -1.]
[ 0.  0.  0. -1.]
-0.9809595249999984
retraining model...
(1061, 13) (1061, 4)
Be aware that dense_48 doesn't have expected shape (3,). It has shape (4,)
Epoch 1/2
Epoch 2/2
[[0.38829893 0.28970259 0.32199848 1.        ]]
[-0.732       0.14285714 -0.375       1.        ]
[ 0.616      -0.14285714  0.275       1.        ]
[ 0.  0.  0. -1.]
-0.9783949500000017
retraining model...
(1647, 13) (1647, 4)
Be aware that dense_48 doesn't have expected shape (3,). It has shape (4,)
Epoch 1/2
Epoch 2/2
[[0.37141669 0.28318691 0.34539637 1.        ]]
[-0.736       0.14285714 -0.375       1.        ]
[ 

Epoch 2/2
[[0.37973055 0.40387839 0.21639113 1.        ]]
[-0.228       0.14285714 -0.375       1.        ]
[ 0.  0.  0. -1.]
[ 0.  0.  0. -1.]
-0.9804310000000259
retraining model...
(6376, 13) (6376, 4)
Be aware that dense_48 doesn't have expected shape (3,). It has shape (4,)
Epoch 1/2
Epoch 2/2
[[9.57493246e-01 4.09671941e-07 4.25062552e-02 1.00000000e+00]]
[-0.744      -0.14285714  0.275       1.        ]
[ 0.  0.  0. -1.]
[ 0.  0.  0. -1.]
-0.9784459249999782
retraining model...
(6959, 13) (6959, 4)
Be aware that dense_48 doesn't have expected shape (3,). It has shape (4,)
Epoch 1/2
Epoch 2/2
[[7.58080721e-01 8.29475510e-09 2.41919294e-01 1.00000000e+00]]
[-0.748      -0.14285714  0.275       1.        ]
[ 0.  0.  0. -1.]
[ 0.  0.  0. -1.]
-0.9679496500000095
retraining model...
(7576, 13) (7576, 4)
Be aware that dense_48 doesn't have expected shape (3,). It has shape (4,)
Epoch 1/2
Epoch 2/2
[[1.41626671e-01 1.14809950e-09 8.58373344e-01 1.00000000e+00]]
[-0.724      -0.14285714

Epoch 2/2
[[9.76387591e-13 2.26564542e-15 1.00000000e+00 1.00000000e+00]]
[-0.668      -0.14285714  0.275       1.        ]
[ 0.  0.  0. -1.]
[ 0.  0.  0. -1.]
-0.9736102249999385
retraining model...
(12685, 13) (12685, 4)
Be aware that dense_48 doesn't have expected shape (3,). It has shape (4,)
Epoch 1/2
Epoch 2/2
[[3.20206278e-23 1.00000000e+00 1.86416483e-17 1.00000000e+00]]
[-0.004      -0.14285714 -0.575       1.        ]
[ 0.816      -0.14285714 -0.575       1.        ]
[ 0.  0.  0. -1.]
-0.9637652499999967
retraining model...
(13373, 13) (13373, 4)
Be aware that dense_48 doesn't have expected shape (3,). It has shape (4,)
Epoch 1/2
Epoch 2/2


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/mateuszpoltorak/anaconda3/envs/dino/lib/python3.6/site-packages/urllib3/connectionpool.py", line 377, in _make_request
    httplib_response = conn.getresponse(buffering=True)
TypeError: getresponse() got an unexpected keyword argument 'buffering'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/mateuszpoltorak/anaconda3/envs/dino/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-101-7ca7a05ac73f>", line 2, in <module>
    main()
  File "<ipython-input-99-a819e3fc5f56>", line 6, in main
    main_loop(driver,show_images)
  File "<ipython-input-97-64899d271955>", line 20, in main_loop
    game_data = get_game_info(driver)
  File "<ipython-input-4-fd0503540337>", line 8, in get_game_info
    }""")
  File "/Users/mateuszpoltorak/anaconda3/envs/dino/lib/python3.

TypeError: must be str, not list