# Dino-Game

Playing Dino-Game of Chrome using Reinforcement Learning Algorithm.

Inspiration of Idea and Source of Learning:
https://blog.paperspace.com/dino-run/

#### Key Points:
- Selenium is used to Interface Deep Learning Model and Browser.
- Open-CV is used to Pre-Process Images
- TensorFlow for building Deep Learning Model.

## Imports

### Importing Libraries

Installing selenium

In [1]:
!pip3 install selenium

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import numpy as np
from PIL import Image
import cv2
import io
import time
import pandas as pd
import numpy as np
from IPython.display import clear_output
from random import randint
import os

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization,Conv2D,Conv2DTranspose,LeakyReLU,Activation,Flatten,Dense,Reshape,Input,Concatenate,MaxPooling2D
from tensorflow.keras.initializers import orthogonal
from tensorflow.keras.models import Model,load_model


from collections import deque
import random
import pickle
from io import BytesIO
import base64

## Global Variables to keep track

In [3]:
game_url = 'chrome://dino'
chrome_driver_path = "chromedriver_linux64/chromedriver"
loss_filepath = "Objects/loss_dataframe.csv"
actions_filepath = "Objects/actions_dataframe.csv"
Qvalues_filepath = "Objects/Qvalues_dataframe.csv"
scores_filepath = "Objects/scores_dataframe.csv"

loss_df = pd.read_csv(loss_filepath) if os.path.isfile(loss_filepath) else pd.DataFrame(columns =['loss'])
scores_df = pd.read_csv(scores_filepath) if os.path.isfile(loss_filepath) else pd.DataFrame(columns = ['scores'])
actions_df = pd.read_csv(actions_filepath) if os.path.isfile(actions_filepath) else pd.DataFrame(columns = ['actions'])
Qvalues_df =pd.read_csv(Qvalues_filepath) if os.path.isfile(Qvalues_filepath) else pd.DataFrame(columns = ['Qvalues'])

## Script

In [4]:
#create id for canvas for faster selection from DOM
init_script = "document.getElementsByClassName('runner-canvas')[0].id = 'runner-canvas'"

#get image from canvas
getbase64Script = "canvasRunner = document.getElementById('runner-canvas'); \
return canvasRunner.toDataURL().substring(22)"

## Game Module

Interfacing between JavaScript and Python

In [5]:
class Game:
    def __init__(self):
        chrome_options = Options()
        chrome_options.add_argument("disable-infobars")
        chrome_options.add_argument("--mute-audio")
        self._driver = webdriver.Chrome(executable_path = chrome_driver_path)
        self._driver.set_window_position(x=-10,y=0)
        try:
            self._driver.get(game_url)
        except:
            pass
        self._driver.execute_script("Runner.config.ACCELERATION=0")
        self._driver.execute_script(init_script)
    def get_playing(self):
        return self._driver.execute_script("return Runner.instance_.playing")
    def get_crashed(self):
        return self._driver.execute_script("return Runner.instance_.crashed")
    def restart(self):
        self._driver.execute_script("Runner.instance_.restart()")
    def press_up(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_UP)
    def press_down(self):
        self._driver.find_element_by_tag_name("body").send_keys(Keys.ARROW_DOWN)
    def get_score(self):
        score_array = self._driver.execute_script("return Runner.instance_.distanceMeter.digits")
        score = ''.join(score_array)
        return int(score)
    def pause(self):
        return self._driver.execute_script("return Runner.instance_.stop()")
    def resume(self):
        return self._driver.execute_script("return Runner.instance_.play()")
    def end(self):
        return self._driver.close()

## DinoAgent

Actions of DinoAgent

In [6]:
class DinoAgent:
    def __init__(self,game):
        self._game = game
        self.jump()
    def is_running(self):
        return self._game.get_playing()
    def is_crashed(self):
        return self._game.get_crashed()
    def jump(self):
        self._game.press_up()
    def duck(self):
        self._game.press_down()

## Game State Module or Environment for Agent

This Module send actions to agents and changes state of Environment as per action. It decides the Reward for the Agent and returns Experience.

In [7]:
class Game_State:
    def __init__(self,agent,game):
        self._agent = agent
        self._game = game
        self._display = ShowImage()
        self._display.__next__()
    def get_state(self,action):
        score = self._game.get_score() 
        reward = 0.2
        isOver = False

        if action[1] == 1:
            actions_df.loc[len(actions_df)] = action[1]
            self._agent.jump()

        elif action[2] == 1:
            actions_df.loc[len(actions_df)] = action[2]
            self._agent.duck()

        image = grab_screen(self._game._driver) 
        #self._display.send(image)
       
        if self._agent.is_crashed():
            scores_df.loc[len(scores_df)] = score
            self._game.restart()
            reward = -1
            isOver = True
            
        return image, reward, isOver

## Extracting Image and Preprocessing

In [8]:
def PreProcessImage(Image):
    img = cv2.cvtColor(Image, cv2.COLOR_BGR2GRAY)
    img = img[:300,:500]
    img = cv2.resize(img,(80,80))
    return img
    
def grab_screen(_driver):
    image_b64 = _driver.execute_script(getbase64Script)
    screen = np.array(Image.open(BytesIO(base64.b64decode(image_b64))))
    image = PreProcessImage(screen)#processing image as required
    return image

def ShowImage(graphs = False):
    while True:
        screen  = (yield)
        window_title = "Logs" if graphs else "Game_Play"
        cv2.namedWindow(window_title, cv2.WINDOW_NORMAL)
        imageScreen = cv2.resize(screen, (800,400))
        cv2.imshow(window_title, screen)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            cv2.destroyAllWindows()
            break

## Objects

In [9]:
def SaveObject(obj, name):
    with open('Objects/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def LoadObject(name):
    with open('Objects/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)

## Parameters

**Actions:**
- No Jump or Duck
- Jump
- Duck

**Epsilon:**
Epsilon is used when we are selecting specific actions base on the Q values we already have. As an example if we select pure greedy method ( epsilon = 0 ) then we are always selecting the highest q value among the all the q values for a specific state. This causes issue in exploration as we can get stuck easily at a local optima.

**Gamma:** Decay Rate of Observations\
**Observations:** Time Steps before Training\
**Explore:** Frames over which to Anneal Epilson\
**Initial_Epsilon:** Initial value of Epsilon\
**Final_Epsilon:** Final value of Epsilon\
**Replay_Memeory:** Transitions to Remember\
**Frame_per_Action:** No.of Frames for Action

Image Dimensions are (80,80,4) as 4-Frames are Stacked Together.

In [10]:
Actions = 3
Gamma = 0.99
Observation = 100
Explore = 1000
Final_Epsilon = 0.0001
Initial_Epsilon = 0.1
Replay_Memory = 50000
Batch_Size = 32
Frame_per_Action = 1
LearningRate = 1e-3

### Initialise Cache


In [11]:
def init_cache():
    SaveObject(Initial_Epsilon,"Epsilon")
    t = 0
    SaveObject(t,"Time")
    D = deque()
    SaveObject(D,"D")

# Initilise Cache only once
init_cache()

## Deep Learning Model

In [12]:
def BuildModel():
    tf.keras.backend.clear_session()

    model = Sequential()
    model.add(Conv2D(32, (8, 8), padding='same',strides=(4, 4),input_shape=(80,80,4)))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (4, 4),strides=(2, 2),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Conv2D(64, (3, 3),strides=(1, 1),  padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(Actions))
    adam = Adam(lr=LearningRate)
    model.compile(loss='mse',optimizer=adam)
    
    if not os.path.isfile(os.getcwd() + '/Model/RLModel.h5'):
        print ('Weights Saved')
        model.save('Model/RLModel.h5')
    
    return model

RLModel = BuildModel()

## Training the Model

In [13]:
def TrainModel(model,Game_State,ObservePerformance):
    lastTime = time.time()
    D = LoadObject("D")

    Action = np.zeros([Actions])
    Action[0] = 1

    xt, r0, termination = Game_State.get_state(Action)

    st = np.stack((xt, xt, xt, xt), axis=2)
    st = st.reshape(1,st.shape[0],st.shape[1],st.shape[2])
    Inital_State = st

    if ObservePerformance:
        Observe = 999999999
        epsilon = Final_Epsilon
        model.load_weights('Model/RLModel.h5')
        model.compile(loss='mse',optimizer=Adam(learning_rate=LearningRate))
        print ("Weights of Model are Loaded")
    else:
        Observe = Observation
        epsilon = LoadObject("Epsilon")
        model.load_weights('Model/RLModel.h5')
        model.compile(loss='mse',optimizer=Adam(learning_rate=LearningRate))

    t = LoadObject('Time')

    while(True):
        loss = 0
        Q_sa = 0
        ActionIndex = 0
        rt = 0
        at = np.zeros([Actions])

        if t%Frame_per_Action == 0:
            # Exploring a Action Randomly
            if random.random() <= epsilon:
                print ("Performing Random Action")
                ActionIndex = random.randrange(Actions)
                at[ActionIndex] = 1
            else:
                q = model.predict(st)
                # Index with Maximum Value is ActionIndex
                ActionIndex = maxQ = np.argmax(q)
                at[ActionIndex] = 1

        if epsilon > Final_Epsilon and t > Observe:
            epsilon -= (Initial_Epsilon - Final_Epsilon)/(Explore)

        xt1, rt, termination = Game_State.get_state(at)
        print ('fps: {0}'.format(1 / (time.time() - lastTime)))

        LastTime = time.time()

        xt1 = xt1.reshape(1,xt1.shape[0],xt1.shape[1],1)
        st1 = np.append(xt1,st[:,:,:,:3],axis=3)

        D.append((st,ActionIndex,rt,st1,termination))

        if len(D) > Replay_Memory:
            D.popleft()

        #Training after Observation
        if t > Observe:
            miniBatch = random.sample(D,Batch_Size)
            inputs = np.zeros((Batch_Size,80,80,4))
            targets = np.zeros((Batch_Size,Actions))

            for i in range(len(miniBatch)):
                state_t, action_t, reward_t,state_t1, termination = miniBatch[i]

                inputs[i:i+1] = state_t
                targets[i] = model.predict(state_t)

                Q_sa = model.predict(state_t1)

                if termination:
                    targets[i:action_t] = reward_t
                else:
                    targets[i:action_t] = reward_t + Gamma * np.max(Q_sa)

            loss += model.train_on_batch(inputs,targets)
            loss_df.loc[len(loss_df)] = loss
            Qvalues_df.loc[len(loss_df)] = np.max(Q_sa)

        st = Inital_State if termination else st1
        t += 1

        if t%1000 == 0:
            Game_State._game.pause()
            model.save_weights('RLModel.h5',overwrite=True)
            SaveObject(D,"D")
            SaveObject(T,"Time")
            SaveObject(epsilon,"Epsilon")

            loss_df.to_csv("./Objects/loss_dataframe.csv",index=False)
            scores_df.to_csv("./Objects/scores_dataframe.csv",index=False)
            actions_df.to_csv("./Objects/actions_dataframe.csv",index=False)
            Qvalues_df.to_csv("./Objects/Qvalues_dataframe.csv",index=False)

            clear_output()
            print ('Model Weights Saved')
            Game_State._game.resume()

        state = ""
        if t<= Observe:
            state = "Observating"
        elif t>Observe and t<=Observe+Explore:
            state = 'Explorating'
        else:
            state = 'Training'

        print ("Time-Step:",t, "/ State", state)
    print ("Episode Finished")

## Start Playing

In [14]:
def playGame(ObservePerformance):
    game = Game()
    dino = DinoAgent(game)
    game_state = Game_State(dino,game)    
    model = BuildModel()
    try:
        TrainModel(model,game_state,ObservePerformance)
    except StopIteration:
        game.end()

In [15]:
playGame(False)

fps: 2.2134289566660597
Time-Step: 1 / State Observating
Performing Random Action
fps: 1.820899669361785
Time-Step: 2 / State Observating
Performing Random Action
fps: 1.602499626530794
Time-Step: 3 / State Observating
fps: 1.4570780234088292
Time-Step: 4 / State Observating
fps: 1.3403359800440546
Time-Step: 5 / State Observating
fps: 1.2460851726909061
Time-Step: 6 / State Observating
fps: 1.1521407828275891
Time-Step: 7 / State Observating
Performing Random Action
fps: 1.0656744806229754
Time-Step: 8 / State Observating
fps: 1.0040460554191337
Time-Step: 9 / State Observating
fps: 0.9417722041957244
Time-Step: 10 / State Observating
fps: 0.8891259342671094
Time-Step: 11 / State Observating
fps: 0.8429867141738525
Time-Step: 12 / State Observating
fps: 0.8029378426811509
Time-Step: 13 / State Observating
fps: 0.7660406677111143
Time-Step: 14 / State Observating
fps: 0.7325100149600048
Time-Step: 15 / State Observating
Performing Random Action
fps: 0.6955631132169966
Time-Step: 16 / S

WebDriverException: Message: chrome not reachable
  (Session info: chrome=85.0.4183.83)
