In [26]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense
import tensorflow_probability as tfp
import gym
import numpy as np
import matplotlib.pyplot as plt
from gym import Env
from gym.spaces import Box, Discrete, Dict
from mss import mss
import pytesseract
import cv2
import sys
import os
import pygame as pg
import random
from gym.envs.registration import register
from setuptools import setup
!{sys.executable} --version

Python 3.7.13


In [27]:
class Sprite():
    def __init__(self, img, pos, game):
        self.position = np.array(pos, dtype=float)
        self.image = img
        self.game = game
        if type(self.image) == list:
            self.height = self.image[0].get_rect()[3]
            self.width = self.image[0].get_rect()[2]
        else:
            self.height = self.image.get_rect()[3]
            self.width = self.image.get_rect()[2]
        self.game.all_sprites.append(self)
        
    def update(self):
        self.position[0] -= self.game.game_speed
        if self.position[0] < -150:
            self.reset()
        self.height = self.image.get_rect()[3]
        self.width = self.image.get_rect()[2]
        
    def get_obs(self):
        return np.append([self.position], [self.width, self.height])
        
    def draw(self, screen):
        screen.blit(self.image, self.position)
        #pg.draw.rect(screen, (255,0,0), (self.position, (self.width, self.height)), 2)

    def reset(self):
        # we need to find a valid location for the reset position in order to not make the game impossible
        min_dist = self.game.game_speed * 20
        max_dist = self.game.game_speed * 60
        max_x_pos = 0
        for obstacle in self.game.obstacles:
            if obstacle.position[0] > max_x_pos:
                max_x_pos = obstacle.position[0]
        self.position[0] = max(max_x_pos + random.randint(min_dist, max_dist), 1100)

In [28]:
class Background(Sprite):
    def __init__(self, img, pos, game, secondary=True):
        super(Background, self).__init__(img, pos, game)
        self.secondary = secondary
        if self.secondary:
            new_bg = Background(self.image, (self.width, self.game.ground_height-15), self.game, secondary=False)
    
    def update(self):
        self.position[0] -= self.game.game_speed
        if self.position[0] <= -self.width+20:
            self.small_reset()
        self.height = self.image.get_rect()[3]
        self.width = self.image.get_rect()[2]
        
    def small_reset(self):
        self.position[0] = self.width
    
    def reset(self):
        self.position[0] = 0
        if self.secondary:
            self.position[0] = self.width

In [29]:
class Cloud(Sprite):
    def __init__(self, img, pos, game):
        super(Cloud, self).__init__(img, pos, game)
        self.width = self.image.get_width()
            
    def reset(self):
        if self.position[0] < -self.width:
            self.position[0] = 1100 + random.randint(2500,3000)
            self.position[1] = random.randint(50,100)

In [30]:
class Cactus(Sprite):
    def __init__(self, img, pos, game):
        super(Cactus, self).__init__(img, pos, game)
        self.position[1] = self.game.ground_height - self.height
        self.game.obstacles.append(self)
        self.imgs = self.image
        self.image = random.choice(self.imgs)
        
    def update(self):
        super(Cactus, self).update()
        self.position[1] = self.game.ground_height - self.height
        
    def reset(self):
        super(Cactus, self).reset()
        self.image = random.choice(self.imgs)
    

In [31]:
class Bird(Sprite):
    def __init__(self, img, pos, game):
        super(Bird, self).__init__(img, pos, game)
        self.game.obstacles.append(self)
        self.imgs = self.image
        self.image = self.imgs[0]
        self.step_index = 0
        self.elevation = random.randint(100,250)
        
    def update(self):
        self.position[1] = self.game.ground_height - self.elevation
        super(Bird, self).update()
        if self.step_index >= 9:
            self.step_index = 0
        self.image = self.imgs[self.step_index//5]
        self.step_index += 1
        
    def reset(self):
        super(Bird, self).reset()
        self.elevation = random.randint(50,200)

In [32]:
class Dino(Sprite):
    def __init__(self, img, pos, game):
        super(Dino, self).__init__(img, pos, game)
        self.imgs = img
        self.duck_offset = np.array([0,33], dtype=float)
        self.jump_vel = 8.5
        
        # running, jumping, ducking
        self.state = np.array([True, False, False], dtype=bool)
        self.new_state = self.state
        self.step_index = 0
        
        self.position[1] = self.game.ground_height - self.height
        self.image = self.imgs[0]
        
    def update(self):
        pass
        
    def reset(self):
        self.position = np.array([10,self.game.ground_height-self.height], dtype=float)
        self.image = self.imgs[0]
                
    def act(self, user_input):
        if self.step_index >= 10:
            self.step_index = 0
            
        actions = [self.run, self.jump, self.duck]
        self.new_state = np.zeros(3, dtype=bool)
        
        # jump if not already jumping
        if user_input[pg.K_UP]:
            self.new_state = np.array([False, True, False])
        # duck if not already jumping
        elif user_input[pg.K_DOWN]:            
            self.new_state = np.array([False, False, True])
        # if not still jumping, run (do nothing)
        else:
            self.new_state = np.array([True, False, False])
            
        # unless the player is still in the air (jumping), apply new action
        if not self.state[1]:
            for state, t_value in enumerate(self.new_state):
                if t_value: actions[state]()
            self.state = self.new_state
        else:
            actions[1]()
            
    def take_action(self, choice):
        if self.step_index >= 10:
            self.step_index = 0
            
        actions = [self.run, self.jump, self.duck]
        self.new_state = np.zeros(3, dtype=bool)
        self.new_state[choice] = True
            
        # unless the player is still in the air (jumping), apply new action
        if not self.state[1]:
            actions[choice]()
            self.state = self.new_state
        else:
            actions[1]()
                
    def run(self):
        # if not currently jumping or about to duck, run (do nothing)
        if not self.state[1] or self.new_state[2] or self.state[2]:
            self.image = self.imgs[:2][self.step_index // 5]
            self.step_index += 1
            self.position[1] = self.game.ground_height - self.height
    
    def jump(self):
        # jump
        self.image = self.imgs[2]
        if self.state[1]:
            self.position[1] -= self.jump_vel * 4
            self.jump_vel -= 0.8
        if self.position[1] + self.height >= self.game.ground_height:
            self.state[1] = False
            self.jump_vel = 10
            self.position[1] = self.game.ground_height - self.height
    
    def duck(self):
        if not self.state[1]:
            self.image = self.imgs[3:5][self.step_index // 5]
            self.step_index += 1
        # unless the player was already ducking, we apply an offset to the position
        if not self.state[2]:
            self.position[1] = self.game.ground_height - self.height
            self.position += self.duck_offset


In [33]:
class Game():
    def __init__(self):
        #pg.init()
        #global ground_height, game_speed, all_sprites, obstacles
       # ground_height = 380
       # game_speed = 14
       # all_sprites = []
       # obstacles = []
        self.points = 0
        
        self.ground_height = 380
        self.game_speed = 14
        self.all_sprites = []
        self.obstacles = []  
        
        imgs = []
        assets = ['Assets/'+i for i in ['Cactus', 'Bird', 'Dino', 'Other']]
        all_assets = [['SmallCactus1.png', 'SmallCactus2.png', 'SmallCactus3.png','LargeCactus1.png', 'LargeCactus2.png', 'LargeCactus3.png'], ['Bird1.png', 'Bird2.png'], ['DinoRun1.png', 'DinoRun2.png', 'DinoJump.png', 'DinoDuck1.png', 'DinoDuck2.png'], ['Cloud.png', 'GameOver.png', 'Reset.png', 'Track.png']]
        for idx, asset in enumerate(assets):
            imgs.append([pg.image.load(os.path.join(asset, i)) for i in all_assets[idx]])
            
        cactus_imgs, bird_imgs, dino_imgs, other_imgs = imgs
        imgs = [item for sublist in imgs for item in sublist]
        all_assets = [item for sublist in all_assets for item in sublist]
        for idx, img in enumerate(imgs):
            print(f'{all_assets[idx]}: {img.get_rect()}')
        
        self.player = Dino(dino_imgs, (10,333), self)
        self.cactus= Cactus(cactus_imgs, (1100+random.randint(200,700), 300), self)
        #self.bird = Bird(bird_imgs, (1100+random.randint(800,1300), 250), self)
        self.background = Background(other_imgs[3], (0,self.ground_height-15), self)
        self.cloud = Cloud(other_imgs[0], (1100+random.randint(800,1000),(random.randint(50,100))), self)
        
        #for sprite in self.all_sprites:
         #   print(sprite.get_obs())
            
    def reset(self):
        self.game_speed = 14
        self.points = 0
        for sprite in self.all_sprites:
            sprite.reset()
        
        
    def run(self):
        pg.init()
        running = True
        clock = pg.time.Clock()
        self.screen = pg.display.set_mode((1100, 600))
        
        while running:
            for event in pg.event.get():
                if event.type == pg.QUIT:
                    running = False
            self.screen.fill((255,255,255))
            user_input = pg.key.get_pressed()
            
            self.score()
            
            self.player.act(user_input)
            
            for sprite in self.all_sprites:
                sprite.draw(self.screen)
                sprite.update()
                
                
            game_over = False
            for obstacle in self.obstacles:
                if self.check_for_collision(obstacle):
                    game_over = True
            if game_over:
                self.reset()
            clock.tick(30)
            pg.display.update()
        
        pg.display.quit()
        pg.quit()
        exit()
        
    def check_for_collision(self, obstacle):
        # checks if two rectangles collide
        o_x, o_y = obstacle.position
        p_x, p_y = self.player.position
        o_w, o_h = obstacle.width, obstacle.height
        p_w, p_h = self.player.width, self.player.height
        return o_x + o_w >= p_x and o_x <= p_x + p_w and o_y + o_h >= p_y and o_y <= p_y + p_h
   
        
    def score(self):
        global game_speed
        self.points += 1
        if self.points % 100 == 0:
            self.game_speed +=1
        font = pg.font.Font('freesansbold.ttf', 20)    
        text = font.render('Score: ' + str(self.points), True, (0,0,0))
        text_rect = text.get_rect()
        text_rect.center = (1000,40)
        self.screen.blit(text, text_rect)
        

In [34]:
game = Game()
#game.run()

SmallCactus1.png: <rect(0, 0, 40, 71)>
SmallCactus2.png: <rect(0, 0, 68, 71)>
SmallCactus3.png: <rect(0, 0, 105, 71)>
LargeCactus1.png: <rect(0, 0, 48, 95)>
LargeCactus2.png: <rect(0, 0, 99, 95)>
LargeCactus3.png: <rect(0, 0, 102, 95)>
Bird1.png: <rect(0, 0, 97, 68)>
Bird2.png: <rect(0, 0, 93, 62)>
DinoRun1.png: <rect(0, 0, 87, 94)>
DinoRun2.png: <rect(0, 0, 88, 94)>
DinoJump.png: <rect(0, 0, 88, 94)>
DinoDuck1.png: <rect(0, 0, 118, 60)>
DinoDuck2.png: <rect(0, 0, 116, 60)>
Cloud.png: <rect(0, 0, 84, 101)>
GameOver.png: <rect(0, 0, 386, 40)>
Reset.png: <rect(0, 0, 75, 101)>
Track.png: <rect(0, 0, 2404, 28)>


In [47]:
class ChromeDinoEnv(Env):
    metadata = {'render_modes': ['human', 'rgb_array'], 'render_fps':30}
    
    def __init__(self, render_mode=None):
        super().__init__()
        self.window_size = (1100, 600)
        
        global ground_height, game_speed, all_sprites, obstacles
        ground_height = 380
        game_speed = 14
        all_sprites = []
        obstacles = []
        self.points = 0
        self.ground_height = 380
        self.game_speed = 14
        self.all_sprites = []
        self.obstacles = []
        
        imgs = []
        assets = ['Assets/'+i for i in ['Cactus', 'Bird', 'Dino', 'Other']]
        all_assets = [['SmallCactus1.png', 'SmallCactus2.png', 'SmallCactus3.png','LargeCactus1.png', 'LargeCactus2.png', 'LargeCactus3.png'], ['Bird1.png', 'Bird2.png'], ['DinoRun1.png', 'DinoRun2.png', 'DinoJump.png', 'DinoDuck1.png', 'DinoDuck2.png'], ['Cloud.png', 'GameOver.png', 'Reset.png', 'Track.png']]
        for idx, asset in enumerate(assets):
            imgs.append([pg.image.load(os.path.join(asset, i)) for i in all_assets[idx]])
            
        cactus_imgs, bird_imgs, dino_imgs, other_imgs = imgs
        
        self.player = Dino(dino_imgs, (10,333), self)
        self.cactus= Cactus(cactus_imgs, (1100+random.randint(200,700), 300), self)
        #self.bird = Bird(bird_imgs, (1100+random.randint(800,1300), 250), self)
        self.background = Background(other_imgs[3], (0,self.ground_height-15), self)
        self.cloud = Cloud(other_imgs[0], (1100+random.randint(800,1000),(random.randint(50,100))), self)
        
        
        # observations are dictionaries with the sprites rectangles (x, y, w, h)
  #      self.observation_space = Dict(
   #         {
    #        "player": Box(0, 2000, shape=(4,), dtype=float),
     #       "bird": Box(0, 2000, shape=(4,), dtype=float),
      #      "cactus": Box(0, 2000, shape=(4,), dtype=float),
       #     "speed": Box(0, 50000, shape=(1,), dtype=int)
        #    }
        #)
        self.observation_space = Box(0, 5000, shape=(4,), dtype=float)
       # self.observation_space = Box(low=np.array([0, -100, 0, 0]), high=np.array([300, 5000, 300, 10000]), dtype=float)
        
        # we have 3 actions: run(do nothing), jump, duck
        self.action_space = Discrete(3)
        
        self.game_over = False
        
        assert render_mode is None or render_mode in self.metadata['render_modes']
        self.render_mode = render_mode
        
        self.window = None
        self.clock = None
        
    def check_for_collision(self, obstacle):
        # checks if two rectangles collide
        o_x, o_y = obstacle.position
        p_x, p_y = self.player.position
        o_w, o_h = obstacle.width, obstacle.height
        p_w, p_h = self.player.width, self.player.height
        return o_x + o_w >= p_x and o_x <= p_x + p_w and o_y + o_h >= p_y and o_y <= p_y + p_h
   
        
    def _get_obs(self):
        # get location and size of relevant objects
        #return {"player": self.player.get_obs(), "bird": self.bird.get_obs(), "cactus": self.cactus.get_obs(), "speed": game_speed}
        p_obs = self.player.get_obs()
        closest_obst = 5000
        closest_height = 0
        for obstacle in self.obstacles:
            obst = obstacle.get_obs()
            if obst[0] < closest_obst:
                if obst[0] > 0:
                    closest_obst = obst[0]
                    closest_height = obst[1]
        dist_to_obst = closest_obst - p_obs[0]
        player_height = p_obs[1]
 #       dist_to_bird = self.bird.get_obs()[0] - (p_obs[0] + p_obs[2]) / 10
  #      dist_to_cactus = self.cactus.get_obs()[0] - (p_obs[0] + p_obs[2])/10
   #     bird_height = self.bird.get_obs()[1]
    #    cactus_height = self.cactus.get_obs()[3]
     #   cactus_width = self.cactus.get_obs()[2]
        game_speed = self.game_speed
        #return np.array([player_height, dist_to_bird, dist_to_cactus, bird_height, cactus_height, cactus_width, game_speed])
        #return np.append(np.concatenate([self.player.get_obs(), self.bird.get_obs(), self.cactus.get_obs()]), game_speed)
        return np.array([player_height, dist_to_obst, closest_height, game_speed])
        
    
    def step(self, action):
        # actions: jump, duck, do nothing
        self.player.take_action(action)
        for sprite in self.all_sprites:
            sprite.update()
        
        observation = self._get_obs()
        done = self.get_done()
        reward = 1
        
  #      if observation[0] < 286:
  #          reward = -0.5
   #     
    #    if observation[1] < 0:
     #       reward = 3
  #      if observation[4] <= 0 or observation [8] <= 0:
   #         reward = 3

            
        info = self._get_info()
        
        if self.render_mode == 'human':
            self._render_frame()
            
        return observation, reward, done, info
    
    def render(self):
        if self.render_mode == 'rgb_array':
            return self._render_frame()
        
    def _render_frame(self):
        global game_speed
        if self.window is None and self.render_mode == 'human':
            pg.init()
            pg.font.init()
            pg.display.init()
            self.window = pg.display.set_mode((1100, 600))
        if self.clock is None and self.render_mode == 'human':
            self.clock = pg.time.Clock()
        
        self.screen = pg.Surface((1100, 600))
        self.screen.fill((255,255,255))
            
        for sprite in self.all_sprites:
            sprite.draw(self.screen)
            
        self.points += 1
        if self.points % 100 == 0:
            self.game_speed +=1
        
        if self.render_mode == 'human':
            font = pg.font.Font('freesansbold.ttf', 20)    
            text = font.render('Score: ' + str(self.points), True, (0,0,0))
            text_rect = text.get_rect()
            text_rect.center = (1000,40)
            self.window.blit(self.screen, self.screen.get_rect())
            self.window.blit(text, (1000,40))
            pg.event.pump()
            pg.display.update()
            self.clock.tick(self.metadata['render_fps'])
        else:
            return np.transpose(
            np.array(pg.surfarray.pixels3d(self.screen)), axes=(1,0,2))
        
    
    def reset(self):
        # reset all sprites to a semi-random (specific to their class) location 
        for sprite in self.all_sprites:
            sprite.reset()
        self.points = 0
        self.game_speed = 14
        observation =  self._get_obs()
        info = self._get_info()
        return observation #, info
    
    def close(self):
        if self.window is not None:
            pg.display.quit()
            pg.quit()
        self.window = None
        self.clock = None
 
    def get_done(self):
        done = False
        for obstacle in self.obstacles:
            if self.check_for_collision(obstacle):
                done = True
        return done
    
    def _get_info(self):
        return{'score': self.points}

In [36]:
class PPOMemory:
    def __init__(self, batch_size):
        self.states = []
        self.actions = []
        self.probs = []
        self.vals = []
        self.rewards = []
        self.dones = []
        
        self.batch_size = batch_size
        
    def generate_batches(self):
        n_states = len(self.states)
        batch_start = np.arange(0, n_states, self.batch_size)
        indices = np.arange(n_states, dtype=np.int64)
        np.random.shuffle(indices)
        batches = [indices[i:i+self.batch_size] for i in batch_start]
        
        return np.array(self.states), np.array(self.actions), np.array(self.probs), np.array(self.vals), np.array(self.rewards), np.array(self.dones), batches
    
    def store_memory(self, state, action, probs, vals, reward, done):
        self.states.append(state)
        self.actions.append(action)
        self.probs.append(probs)
        self.vals.append(vals)
        self.rewards.append(reward)
        self.dones.append(done)
        
    def clear_memory(self):
        self.states = []
        self.probs = []
        self.actions = []
        self.rewards = []
        self.dones = []
        self.vals = []

In [37]:
class ActorNetwork(keras.Model):
    def __init__(self, n_actions, fc1_dims=256, fc2_dims=256):
        super(ActorNetwork, self).__init__()
        self.fc1 = Dense(fc1_dims, activation='relu')
        self.fc2 = Dense(fc2_dims, activation='relu')
        self.fc3 = Dense(n_actions, activation='softmax')
        
    def call(self, state):
        x = self.fc1(state)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [38]:
class CriticNetwork(keras.Model):
    def __init__(self, fc1_dims=256, fc2_dims=256):
        super(CriticNetwork, self).__init__()
        self.fc1 = Dense(fc1_dims, activation='relu')
        self.fc2 = Dense(fc2_dims, activation='relu')
        self.fc3 = Dense(1, activation=None)
        
    def call(self, state):
        x = self.fc1(state)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [39]:
class Agent():
    def __init__(self, n_actions, input_dims, gamma=0.99, alpha=0.0003, gae_lambda=0.95, policy_clip=0.2, batch_size=64, n_epochs=10, chkpt_dir='models/'):
        self.gamma = gamma
        self.policy_clip = policy_clip
        self.n_epochs = n_epochs
        self.gae_lambda = gae_lambda
        self.chkpt_dir = chkpt_dir
        
        self.actor = ActorNetwork(n_actions)
        self.critic = CriticNetwork()
        self.actor.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha))
        self.critic.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha))
        
        self.memory = PPOMemory(batch_size)
        
    def store_transition(self, state, actions, probs, vals, reward, done):
        self.memory.store_memory(state, action, probs, vals, reward, done)
        
    def save_models(self):
        self.actor.save(self.chkpt_dir + 'actor')
        self.critic.save(self.chkpt_dir + 'critic')
        
    def load_models(self):
        print('... loading models ...')
        self.actor = keras.models.load_model(self.chkpt_dir + 'actor')
        self.critic = keras.models.load_model(self.chkpt_dir + 'critic')
        
    def choose_action(self, observation):
        state = tf.convert_to_tensor([observation])
        
        probs = self.actor(state)
        dist = tfp.distributions.Categorical(probs)
        action = dist.sample()
        log_prob = dist.log_prob(action)
        value = self.critic(state)
        
        action = action.numpy()[0]
        value = value.numpy()[0]
        log_prob = log_prob.numpy()[0]
        
        return action, log_prob, value
    
    def learn(self):
        for epoch in range(self.n_epochs):
            state_arr, action_arr, old_prob_arr, vals_arr, reward_arr, dones_arr, batches = self.memory.generate_batches()
            values = vals_arr
            advantage = np.zeros(len(reward_arr), dtype=np.float32)
            
            for t in range(len(reward_arr)-1):
                discount = 1
                a_t = 0
                for k in range(t, len(reward_arr)-1):
                    a_t += discount*(reward_arr[k] + self.gamma*values[k+1]*(1-int(dones_arr[k])) - values[k])
                    discount *= self.gamma*self.gae_lambda
                advantage[t] = a_t
                    
            for batch in batches:
                with tf.GradientTape(persistent=True) as tape:
                    states = tf.convert_to_tensor(state_arr[batch])
                    old_probs = tf.convert_to_tensor(old_prob_arr[batch])
                    actions = tf.convert_to_tensor(action_arr[batch])
                    
                    probs = self.actor(states)
                    dist = tfp.distributions.Categorical(probs)
                    new_probs = dist.log_prob(actions)
                    
                    critic_value = self.critic(states)
                    critic_value = tf.squeeze(critic_value, 1)
                    
                    prob_ratio = tf.math.exp(new_probs - old_probs)
                    
                    weighted_probs = advantage[batch] * prob_ratio
                    clipped_probs = tf.clip_by_value(prob_ratio, 1-self.policy_clip, 1+self.policy_clip)
                    weighted_clipped_probs = clipped_probs * advantage[batch]
                    
                    actor_loss = -tf.math.minimum(weighted_probs, weighted_clipped_probs)
                    actor_loss = tf.math.reduce_mean(actor_loss)
                    
                    returns = advantage[batch] + values[batch]
                    critic_loss = keras.losses.MSE(critic_value, returns)
                
                actor_params = self.actor.trainable_variables
                critic_params = self.critic.trainable_variables
                actor_grads = tape.gradient(actor_loss, actor_params)
                critic_grads = tape.gradient(critic_loss, critic_params)
                self.actor.optimizer.apply_gradients(zip(actor_grads, actor_params))
                self.critic.optimizer.apply_gradients(zip(critic_grads, critic_params))
                
        self.memory.clear_memory()

In [40]:
env = ChromeDinoEnv()
env._get_obs()
#env.observation_space.sample()
env.reset()

array([ 286., 2419.,  309.,   14.])

In [16]:
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker
from stable_baselines3 import DQN
import tensorflow as tf

In [18]:
env_checker.check_env(env)

In [19]:
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
            
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, f'best_model{self.n_calls}')
            self.model.save(model_path)
        return True

In [20]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [21]:
callback = TrainAndLoggingCallback(check_freq=1000, save_path=CHECKPOINT_DIR)
callback._init_callback()

In [22]:
model = DQN('MlpPolicy', env, buffer_size=120000, learning_starts=1000)

In [23]:
model.learn(total_timesteps=5000, callback=callback)
#model.save('dqn_chrome_dino')

ValueError: Cell is empty

In [46]:
if __name__ == '__main__':
    env = ChromeDinoEnv()
    #env = gym.make('CartPole-v0')
    N = 20
    batch_size = 5
    n_epochs = 4
    alpha = 0.0003
    agent = Agent(n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha, n_epochs=n_epochs, input_dims=env.observation_space.shape)
    n_games = 2000
    
    figure_file = 'plots/ChromeDino.png'
    
    best_score = env.reward_range[0]
    score_hist = []
    
    learn_iters = 0
    avg_score = 0
    n_steps = 0
    
    
    for i in range(n_games):
        #env.reset()
        observation = env.reset()#[0]
        #print(f'speed: {env.game_speed}')
        done = False
        score = 0
        
        while not done:
            action, prob, val = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            n_steps += 1
            score += reward
            agent.store_transition(observation, action, prob, val, reward, done)
            if n_steps % N == 0:
                agent.learn()
                learn_iters += 1
            observation = observation_
            #print(f'observation:{observation}')
        score_hist.append(score)
        avg_score = np.mean(score_hist[-100:])
            
        if avg_score > best_score:
            best_score = avg_score
            agent.save_models()
        
        print('episode', i, 'score %.1f' % score, 'avg score %.1f'% avg_score, 'time_steps', n_steps, 'learning_steps', learn_iters)
    x = [i+1 for i in range(len(score_hist))]
    #plot_learning_curve(x, score_hist, figure_file)

INFO:tensorflow:Assets written to: models/actor/assets
INFO:tensorflow:Assets written to: models/critic/assets
episode 0 score -88.5 avg score -88.5 time_steps 571 learning_steps 28
INFO:tensorflow:Assets written to: models/actor/assets
INFO:tensorflow:Assets written to: models/critic/assets
episode 1 score -3.0 avg score -45.8 time_steps 643 learning_steps 32
INFO:tensorflow:Assets written to: models/actor/assets
INFO:tensorflow:Assets written to: models/critic/assets
episode 2 score -41.0 avg score -44.2 time_steps 895 learning_steps 44
INFO:tensorflow:Assets written to: models/actor/assets
INFO:tensorflow:Assets written to: models/critic/assets
episode 3 score -6.0 avg score -34.6 time_steps 967 learning_steps 48
INFO:tensorflow:Assets written to: models/actor/assets
INFO:tensorflow:Assets written to: models/critic/assets
episode 4 score -29.5 avg score -33.6 time_steps 1044 learning_steps 52
episode 5 score -42.5 avg score -35.1 time_steps 1218 learning_steps 60
INFO:tensorflow:Ass

KeyboardInterrupt: 

In [25]:
env = ChromeDinoEnv(render_mode='human')

In [26]:
if __name__ == '__main__':
    env = ChromeDinoEnv(render_mode='human')
    N = 20
    batch_size = 5
    n_epochs = 4
    alpha = 0.0003
    agent = Agent(n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha, n_epochs=n_epochs, input_dims=env.observation_space.shape)
    agent.load_models() 
    
    figure_file = 'plots/ChromeDino.png'
    
    best_score = env.reward_range[0]
    score_hist = []
    
    learn_iters = 0
    avg_score = 0
    n_steps = 0
    
    
    observation = env.reset()
    done = False
    while not done:
        action, prob, val = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        n_steps += 1
        score += reward
        #agent.store_transition(observation, action, prob, val, reward, done)
        observation = observation_
        score_hist.append(score)
        avg_score = np.mean(score_hist[-100:])
        

... loading models ...


In [None]:
x = 0
while x < 5:
    x = random.randint(0,6)
    print(x)