In [36]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense
import tensorflow_probability as tfp
import gym
import numpy as np
import matplotlib.pyplot as plt
from gym import Env
from gym.spaces import Box, Discrete, Dict
from mss import mss
import pytesseract
import cv2
import sys
import os
import pygame as pg
import random
from gym.envs.registration import register
from setuptools import setup
!{sys.executable} --version

Python 3.7.13


In [37]:
class Sprite():
    # superclass for all objects in the game
    def __init__(self, img, pos, game):
        # each sprite gets a position, an image (or list of images) and a reference to the game itself
        self.position = np.array(pos, dtype=np.float32)
        self.image = img
        self.game = game
        if type(self.image) == list:
            self.height = self.image[0].get_rect()[3]
            self.width = self.image[0].get_rect()[2]
        else:
            self.height = self.image.get_rect()[3]
            self.width = self.image.get_rect()[2]
            
        # add the instance to the sprite list of the game
        self.game.all_sprites.append(self)
        
    def update(self):
        # every iteration, update the position of the object
        self.position[0] -= self.game.game_speed
        # if the object has moved off screen, reset
        if self.position[0] + self.width < 20:
            self.reset()
        self.height = self.image.get_rect()[3]
        self.width = self.image.get_rect()[2]
        
    def get_obs(self):
        # return the rect of the object (x, y, w, h)
        return np.append([self.position], [self.width, self.height])
        
    def draw(self, screen):
        # function to draw the object to the pg.display
        screen.blit(self.image, self.position)
        #pg.draw.rect(screen, (255,0,0), (self.position, (self.width, self.height)), 2)

    def reset(self):
        # resets the object to a location to the right of the display
        # we need to find a valid location for the reset position in order to not make the game impossible when obstacles are too close together
        min_dist = self.game.game_speed * 20
        max_dist = self.game.game_speed * 60
        max_x_pos = 0
        for obstacle in self.game.obstacles:
            if obstacle.position[0] > max_x_pos:
                max_x_pos = obstacle.position[0]
        # set the new position
        self.position[0] = max(max_x_pos + random.randint(min_dist, max_dist), 1100)

In [38]:
class Background(Sprite):
    # subclass for the background
    def __init__(self, img, pos, game, secondary=False):
        super(Background, self).__init__(img, pos, game)
        self.secondary = secondary
        # the actual game background consists of two intances of the Background class right behind each other
        if not self.secondary:
            new_bg = Background(self.image, (self.width, self.game.ground_height-15), self.game, secondary=True)

    def reset(self):
        # the reset during an episode until the environment is done
        self.position[0] = self.width
        if self.game.full_reset:
            # the reset if the environment is done
            if not self.secondary:
                self.position[0] = 0
            else:
                self.position[0] = self.width

In [39]:
class Cloud(Sprite):
    def __init__(self, img, pos, game):
        super(Cloud, self).__init__(img, pos, game)
        
    def reset(self):
        # reset for the cloud
        self.position[0] = 1100 + random.randint(2500,3000)
        self.position[1] = random.randint(50,100)

In [40]:
class Cactus(Sprite):
    def __init__(self, img, pos, game):
        super(Cactus, self).__init__(img, pos, game)
        self.position[1] = self.game.ground_height - self.height
        self.game.obstacles.append(self)
        self.imgs = self.image
        self.image = random.choice(self.imgs)
        
    def update(self):
        super(Cactus, self).update()
        self.position[1] = self.game.ground_height - self.height
        
    def reset(self):
        super(Cactus, self).reset()
        # get a new cactus image
        self.image = random.choice(self.imgs)
        self.game.point_counter = True
    

In [41]:
class Bird(Sprite):
    def __init__(self, img, pos, game):
        super(Bird, self).__init__(img, pos, game)
        self.game.obstacles.append(self)
        self.imgs = self.image
        self.image = self.imgs[0]
        self.step_index = 0
        self.elevation = random.randint(100,250)
        
    def update(self):
        self.position[1] = self.game.ground_height - self.elevation
        super(Bird, self).update()
        if self.step_index >= 9:
            self.step_index = 0
        self.image = self.imgs[self.step_index//5]
        self.step_index += 1
        
    def reset(self):
        super(Bird, self).reset()
        self.elevation = random.randint(50,200)
        self.game.point_counter = True

In [42]:
class Dino(Sprite):
    def __init__(self, img, pos, game):
        super(Dino, self).__init__(img, pos, game)
        self.imgs = img
        self.duck_offset = np.array([0,33], dtype=np.float32)
        self.jump_vel = 8.5
        
        # state = [running, jumping, ducking]
        self.state = np.array([True, False, False], dtype=bool)
        self.new_state = self.state
        self.step_index = 0
        
        self.position[1] = self.game.ground_height - self.height
        self.image = self.imgs[0]
        
    def update(self):
        pass
        
    def reset(self):
        self.position = np.array([10,self.game.ground_height-self.height], dtype=np.float32)
        self.image = self.imgs[0]
            
    def take_action(self, choice):
        # perform the chosen action
        if self.step_index >= 10:
            self.step_index = 0
            
        actions = [self.run, self.jump, self.duck]
        self.new_state = np.zeros(3, dtype=bool)
        self.new_state[choice] = True
            
        # unless the player is still in the air (jumping), apply new action
        if not self.state[1]:
            actions[choice]()
            self.state = self.new_state
        else:
            actions[1]()
                
    def run(self):
        # if not currently jumping or about to duck, run (do nothing)
        if not self.state[1] or self.new_state[2] or self.state[2]:
            # iterates through the images for animation
            self.image = self.imgs[:2][self.step_index // 5]
            self.step_index += 1
            self.position[1] = self.game.ground_height - self.height
    
    def jump(self):
        # jump
        self.image = self.imgs[2]
        if self.state[1]:
            self.position[1] -= self.jump_vel * 4
            self.jump_vel -= 0.8
        if self.position[1] + self.height >= self.game.ground_height:
            self.state[1] = False
            self.jump_vel = 10
            self.position[1] = self.game.ground_height - self.height
    
    def duck(self):
        if not self.state[1]:
            # iterates through the images for animation
            self.image = self.imgs[3:5][self.step_index // 5]
            self.step_index += 1
        # unless the player was already ducking, we apply an offset to the position
        if not self.state[2]:
            self.position[1] = self.game.ground_height - self.height
            self.position += self.duck_offset


In [1]:
class ChromeDinoEnv(Env):
    # gym Env subclass, follows gym documentation
    metadata = {'render_modes': ['human', 'rgb_array'], 'render_fps':30}
    
    def __init__(self, render_mode=None):
        super().__init__()
        self.window_size = (1100, 600)

        ground_height = 380
        game_speed = 14
        all_sprites = []
        obstacles = []
        self.points = 0
        self.ground_height = 380
        self.game_speed = 14
        self.full_reset = True
        self.all_sprites = []
        self.obstacles = []
        self.point_counter = False
        
        imgs = []
        # get all images for the game animation
        assets = ['Assets/'+i for i in ['Cactus', 'Bird', 'Dino', 'Other']]
        all_assets = [['SmallCactus1.png', 'SmallCactus2.png', 'SmallCactus3.png','LargeCactus1.png', 'LargeCactus2.png', 'LargeCactus3.png'], ['Bird1.png', 'Bird2.png'], ['DinoRun1.png', 'DinoRun2.png', 'DinoJump.png', 'DinoDuck1.png', 'DinoDuck2.png'], ['Cloud.png', 'GameOver.png', 'Reset.png', 'Track.png']]
        for idx, asset in enumerate(assets):
            imgs.append([pg.image.load(os.path.join(asset, i)) for i in all_assets[idx]])
            
        cactus_imgs, bird_imgs, dino_imgs, other_imgs = imgs
        
        # initialize all Objects in the game
        self.player = Dino(dino_imgs, (10,333), self)
        self.cactus= Cactus(cactus_imgs, (1100+random.randint(200,700), 300), self)
        #self.bird = Bird(bird_imgs, (1100+random.randint(800,1300), 250), self)
        self.background = Background(other_imgs[3], (0,self.ground_height-15), self)
        self.cloud = Cloud(other_imgs[0], (1100+random.randint(800,1000),(random.randint(50,100))), self)
        
        """    
        old code:
        observations are dictionaries with the sprites rectangles (x, y, w, h)
        self.observation_space = Dict(
            {
            "player": Box(0, 2000, shape=(4,), dtype=float),
            "bird": Box(0, 2000, shape=(4,), dtype=float),
            "cactus": Box(0, 2000, shape=(4,), dtype=float),
            "speed": Box(0, 50000, shape=(1,), dtype=int)
            }
        )
        """     
        
        # observation_space with shape (4,): player height, distance to next obstacle, obstacle height, bool is_jumping
        self.observation_space = Box(low=np.array([0., -100., 0., 0.]), high=np.array([400., 5000., 400., 1.]), dtype=np.float32)
   
        # we have 3 discrete actions: run(do nothing), jump, duck
        self.action_space = Discrete(3)
        
        self.game_over = False
        
        assert render_mode is None or render_mode in self.metadata['render_modes']
        self.render_mode = render_mode
        
        self.window = None
        self.clock = None
        
    def check_for_collision(self, obstacle):
        # checks if two rectangles collide
        o_x, o_y = obstacle.position
        p_x, p_y = self.player.position
        o_w, o_h = obstacle.width, obstacle.height
        p_w, p_h = self.player.width, self.player.height
        return o_x + o_w >= p_x and o_x <= p_x + p_w and o_y + o_h >= p_y and o_y <= p_y + p_h
   
        
    def _get_obs(self):
        # get location and size of relevant objects
        #return {"player": self.player.get_obs(), "bird": self.bird.get_obs(), "cactus": self.cactus.get_obs(), "speed": game_speed}
        
        p_obs = self.player.get_obs()
        closest_obst = 10000
        closest_height = 0
        # get the closest obstacle
        for obstacle in self.obstacles:
            obst = obstacle.get_obs()
            if obst[0] < closest_obst:
                # only take into account obstacles that are still in front of the player
                if obst[0] + obst[3] > p_obs[0]:
                    closest_obst = obst[0]
                    closest_height = obst[1]
        # get the distance to the closest obstacle
        dist_to_obst = closest_obst - p_obs[0]
        player_height = p_obs[1]
        
        is_jumping = False
        if player_height < 285:
            is_jumping = True
        game_speed = self.game_speed

        return np.array([player_height, dist_to_obst, closest_height, is_jumping], dtype=np.float32)
        
    
    def step(self, action):
        # perform a player action
        self.player.take_action(action)
        # update all game objects
        for sprite in self.all_sprites:
            sprite.update()
            
        self.points += 1
        #if self.points % 100 == 0:
         #   self.game_speed +=1
        
        # get relevant information from the environment
        observation = self._get_obs()
        done = self.get_done()
        self.full_reset = done
        
        #reward = 1 
        
        if self.point_counter:
            reward = 10
            self.point_counter = False
        elif not observation[3]:
            reward = 0.1
        else:
            reward = 0
            
        if done:
            reward = -10
        """
        possible additional rewards:
        if observation[3]:
            reward = -0.5
        if observation[1] < 0:
            reward = 3
        """
            
        info = self._get_info()
        
        # renders the Environment if render_mode is set to human
        if self.render_mode == 'human':
            self._render_frame()
            
        return observation, reward, done, info
    
    def render(self):
        if self.render_mode == 'rgb_array':
            return self._render_frame()
        
    def _render_frame(self):
        # render the environment using pygame
        if self.window is None and self.render_mode == 'human':
            pg.init()
            pg.font.init()
            pg.display.init()
            self.window = pg.display.set_mode((1100, 600))
        if self.clock is None and self.render_mode == 'human':
            self.clock = pg.time.Clock()
        
        # init the screen
        self.screen = pg.Surface((1100, 600))
        self.screen.fill((255,255,255))
            
        # draw all game objects on the screen
        for sprite in self.all_sprites:
            sprite.draw(self.screen)
            
        if self.render_mode == 'human':
            font = pg.font.Font('freesansbold.ttf', 20)    
            text = font.render('Score: ' + str(self.points), True, (0,0,0))
            text_rect = text.get_rect()
            text_rect.center = (1000,40)
            self.window.blit(self.screen, self.screen.get_rect())
            self.window.blit(text, (1000,40))
            pg.event.pump()
            pg.display.update()
            self.clock.tick(self.metadata['render_fps'])
        else:
            return np.transpose(
            np.array(pg.surfarray.pixels3d(self.screen)), axes=(1,0,2))
        
    
    def reset(self):
        # reset all sprites to a semi-random (specific to their class) location 
        for sprite in self.all_sprites:
            sprite.reset()
        # reset game stats
        self.point_counter = False
        self.points = 0
        self.game_speed = 14
        observation =  self._get_obs()
        info = self._get_info()
        return observation #, info
    
    def close(self):
        # close the pg.display
        if self.window is not None:
            pg.display.quit()
            pg.quit()
        self.window = None
        self.clock = None
 
    def get_done(self):
        # checks the terminal state for the env (player collides with obstacle)
        for obstacle in self.obstacles:
            if self.check_for_collision(obstacle):
                return True
        return False
    
    def _get_info(self):
        return{'score': self.points}
    
    def play(self):
        # lets a human play the game
        self.render_mode = 'human'
        # take a step to initialize the pygame display in env._render_frame()
        if self.window is None:
            self.reset()
            self.step(0)
            done = False
        while not done:
            # get the input
            user_input = pg.key.get_pressed()
            # jump
            if user_input[pg.K_UP]:
                action =  1
            # duck 
            elif user_input[pg.K_DOWN]:            
                action =  2
            # run
            else:
                action = 0
            _, _, done, _ = self.step(action)
        self.close()

NameError: name 'Env' is not defined

In [44]:
class PPOMemory:
    def __init__(self, batch_size):
        self.states = []
        self.actions = []
        self.probs = []
        self.vals = []
        self.rewards = []
        self.dones = []
        
        self.batch_size = batch_size
        
    def generate_batches(self):
        n_states = len(self.states)
        batch_start = np.arange(0, n_states, self.batch_size)
        indices = np.arange(n_states, dtype=np.int64)
        np.random.shuffle(indices)
        batches = [indices[i:i+self.batch_size] for i in batch_start]
        
        return np.array(self.states), np.array(self.actions), np.array(self.probs), np.array(self.vals), np.array(self.rewards), np.array(self.dones), batches
    
    def store_memory(self, state, action, probs, vals, reward, done):
        self.states.append(state)
        self.actions.append(action)
        self.probs.append(probs)
        self.vals.append(vals)
        self.rewards.append(reward)
        self.dones.append(done)
        
    def clear_memory(self):
        self.states = []
        self.probs = []
        self.actions = []
        self.rewards = []
        self.dones = []
        self.vals = []

In [32]:
class ActorNetwork(keras.Model):
    def __init__(self, n_actions, fc1_dims=256, fc2_dims=256):
        super(ActorNetwork, self).__init__()
        self.fc1 = Dense(fc1_dims, activation='relu')
        self.fc2 = Dense(fc2_dims, activation='relu')
        self.fc3 = Dense(n_actions, activation='softmax')
        
    def call(self, state):
        x = self.fc1(state)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [33]:
class CriticNetwork(keras.Model):
    def __init__(self, fc1_dims=256, fc2_dims=256):
        super(CriticNetwork, self).__init__()
        self.fc1 = Dense(fc1_dims, activation='relu')
        self.fc2 = Dense(fc2_dims, activation='relu')
        self.fc3 = Dense(1, activation=None)
        
    def call(self, state):
        x = self.fc1(state)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [34]:
class Agent():
    def __init__(self, n_actions, input_dims, gamma=0.99, alpha=0.0003, gae_lambda=0.95, policy_clip=0.2, batch_size=64, n_epochs=10, chkpt_dir='models/'):
        self.gamma = gamma
        self.policy_clip = policy_clip
        self.n_epochs = n_epochs
        self.gae_lambda = gae_lambda
        self.chkpt_dir = chkpt_dir
        
        self.actor = ActorNetwork(n_actions)
        self.critic = CriticNetwork()
        self.actor.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha))
        self.critic.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=alpha))
        
        self.memory = PPOMemory(batch_size)
        
    def store_transition(self, state, actions, probs, vals, reward, done):
        self.memory.store_memory(state, action, probs, vals, reward, done)
        
    def save_models(self):
        self.actor.save(self.chkpt_dir + 'actor')
        self.critic.save(self.chkpt_dir + 'critic')
        
    def load_models(self):
        print('... loading models ...')
        self.actor = keras.models.load_model(self.chkpt_dir + 'actor')
        self.critic = keras.models.load_model(self.chkpt_dir + 'critic')
        
    def choose_action(self, observation):
        state = tf.convert_to_tensor([observation])
        
        probs = self.actor(state)
        dist = tfp.distributions.Categorical(probs)
        action = dist.sample()
        log_prob = dist.log_prob(action)
        value = self.critic(state)
        
        action = action.numpy()[0]
        value = value.numpy()[0]
        log_prob = log_prob.numpy()[0]
        
        return action, log_prob, value
    
    def learn(self):
        for epoch in range(self.n_epochs):
            state_arr, action_arr, old_prob_arr, vals_arr, reward_arr, dones_arr, batches = self.memory.generate_batches()
            values = vals_arr
            advantage = np.zeros(len(reward_arr), dtype=np.float32)
            
            for t in range(len(reward_arr)-1):
                discount = 1
                a_t = 0
                for k in range(t, len(reward_arr)-1):
                    a_t += discount*(reward_arr[k] + self.gamma*values[k+1]*(1-int(dones_arr[k])) - values[k])
                    discount *= self.gamma*self.gae_lambda
                advantage[t] = a_t
                    
            for batch in batches:
                with tf.GradientTape(persistent=True) as tape:
                    states = tf.convert_to_tensor(state_arr[batch])
                    old_probs = tf.convert_to_tensor(old_prob_arr[batch])
                    actions = tf.convert_to_tensor(action_arr[batch])
                    
                    probs = self.actor(states)
                    dist = tfp.distributions.Categorical(probs)
                    new_probs = dist.log_prob(actions)
                    
                    critic_value = self.critic(states)
                    critic_value = tf.squeeze(critic_value, 1)
                    
                    prob_ratio = tf.math.exp(new_probs - old_probs)
                    
                    weighted_probs = advantage[batch] * prob_ratio
                    clipped_probs = tf.clip_by_value(prob_ratio, 1-self.policy_clip, 1+self.policy_clip)
                    weighted_clipped_probs = clipped_probs * advantage[batch]
                    
                    actor_loss = -tf.math.minimum(weighted_probs, weighted_clipped_probs)
                    actor_loss = tf.math.reduce_mean(actor_loss)
                    
                    returns = advantage[batch] + values[batch]
                    critic_loss = keras.losses.MSE(critic_value, returns)
                
                actor_params = self.actor.trainable_variables
                critic_params = self.critic.trainable_variables
                actor_grads = tape.gradient(actor_loss, actor_params)
                critic_grads = tape.gradient(critic_loss, critic_params)
                self.actor.optimizer.apply_gradients(zip(actor_grads, actor_params))
                self.critic.optimizer.apply_gradients(zip(critic_grads, critic_params))
                
        self.memory.clear_memory()

In [19]:
if __name__ == '__main__':    
    env = ChromeDinoEnv()
    env.play()


In [22]:
if __name__ == '__main__':
    env = ChromeDinoEnv()
    #env = gym.make('CartPole-v0')
    N = 32
    batch_size = 8
    n_epochs = 8
    alpha = 0.0003
    agent = Agent(n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha, n_epochs=n_epochs, input_dims=env.observation_space.shape)
    n_games = 2000
    
    figure_file = 'plots/ChromeDino.png'
    
    best_score = env.reward_range[0]
    score_hist = []
    
    learn_iters = 0
    avg_score = 0
    n_steps = 0
    
    
    for i in range(n_games):
        #env.reset()
        observation = env.reset()#[0]
        #print(f'speed: {env.game_speed}')
        done = False
        score = 0
        
        while not done:
            action, prob, val = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            n_steps += 1
            score += reward
            agent.store_transition(observation, action, prob, val, reward, done)
            if n_steps % N == 0:
                agent.learn()
                learn_iters += 1
            observation = observation_
            #print(f'observation:{observation}')
        score_hist.append(score)
        avg_score = np.mean(score_hist[-100:])
            
        if avg_score > best_score:
            best_score = avg_score
            agent.save_models()
        
        print('episode', i, 'score %.1f' % score, 'avg score %.1f'% avg_score, 'time_steps', n_steps, 'learning_steps', learn_iters)
    env.close()

  "Box bound precision lowered by casting to {}".format(self.dtype)


INFO:tensorflow:Assets written to: models/actor/assets
INFO:tensorflow:Assets written to: models/critic/assets
episode 0 score 164.0 avg score 164.0 time_steps 164 learning_steps 8
INFO:tensorflow:Assets written to: models/actor/assets
INFO:tensorflow:Assets written to: models/critic/assets
episode 1 score 1078.0 avg score 621.0 time_steps 1242 learning_steps 62
episode 2 score 81.0 avg score 441.0 time_steps 1323 learning_steps 66
episode 3 score 80.0 avg score 350.8 time_steps 1403 learning_steps 70
episode 4 score 82.0 avg score 297.0 time_steps 1485 learning_steps 74
episode 5 score 82.0 avg score 261.2 time_steps 1567 learning_steps 78
episode 6 score 333.0 avg score 271.4 time_steps 1900 learning_steps 95
episode 7 score 84.0 avg score 248.0 time_steps 1984 learning_steps 99
episode 8 score 80.0 avg score 229.3 time_steps 2064 learning_steps 103
episode 9 score 168.0 avg score 223.2 time_steps 2232 learning_steps 111
episode 10 score 80.0 avg score 210.2 time_steps 2312 learning_

episode 108 score 77.0 avg score 258.6 time_steps 27920 learning_steps 1396
episode 109 score 84.0 avg score 257.7 time_steps 28004 learning_steps 1400
episode 110 score 83.0 avg score 257.8 time_steps 28087 learning_steps 1404
episode 111 score 500.0 avg score 261.9 time_steps 28587 learning_steps 1429
episode 112 score 245.0 avg score 259.4 time_steps 28832 learning_steps 1441
episode 113 score 504.0 avg score 263.7 time_steps 29336 learning_steps 1466
episode 114 score 245.0 avg score 265.3 time_steps 29581 learning_steps 1479
episode 115 score 249.0 avg score 267.0 time_steps 29830 learning_steps 1491
episode 116 score 81.0 avg score 267.0 time_steps 29911 learning_steps 1495
episode 117 score 736.0 avg score 273.5 time_steps 30647 learning_steps 1532
episode 118 score 76.0 avg score 272.6 time_steps 30723 learning_steps 1536
episode 119 score 81.0 avg score 271.7 time_steps 30804 learning_steps 1540
episode 120 score 81.0 avg score 271.7 time_steps 30885 learning_steps 1544
episod

episode 216 score 250.0 avg score 191.9 time_steps 49099 learning_steps 2454
episode 217 score 500.0 avg score 189.5 time_steps 49599 learning_steps 2479
episode 218 score 81.0 avg score 189.6 time_steps 49680 learning_steps 2484
episode 219 score 81.0 avg score 189.6 time_steps 49761 learning_steps 2488
episode 220 score 80.0 avg score 189.6 time_steps 49841 learning_steps 2492
episode 221 score 81.0 avg score 189.6 time_steps 49922 learning_steps 2496
episode 222 score 81.0 avg score 189.5 time_steps 50003 learning_steps 2500
episode 223 score 84.0 avg score 189.6 time_steps 50087 learning_steps 2504
episode 224 score 81.0 avg score 189.6 time_steps 50168 learning_steps 2508
episode 225 score 82.0 avg score 189.6 time_steps 50250 learning_steps 2512
episode 226 score 82.0 avg score 189.6 time_steps 50332 learning_steps 2516
episode 227 score 246.0 avg score 189.6 time_steps 50578 learning_steps 2528
episode 228 score 165.0 avg score 190.4 time_steps 50743 learning_steps 2537
episode 

episode 324 score 81.0 avg score 295.9 time_steps 79763 learning_steps 3988
episode 325 score 330.0 avg score 298.4 time_steps 80093 learning_steps 4004
episode 326 score 246.0 avg score 300.1 time_steps 80339 learning_steps 4016
episode 327 score 566.0 avg score 303.3 time_steps 80905 learning_steps 4045
episode 328 score 317.0 avg score 304.8 time_steps 81222 learning_steps 4061
episode 329 score 79.0 avg score 299.1 time_steps 81301 learning_steps 4065
episode 330 score 163.0 avg score 299.9 time_steps 81464 learning_steps 4073
episode 331 score 330.0 avg score 302.4 time_steps 81794 learning_steps 4089
episode 332 score 248.0 avg score 304.1 time_steps 82042 learning_steps 4102
episode 333 score 81.0 avg score 304.1 time_steps 82123 learning_steps 4106
episode 334 score 165.0 avg score 304.9 time_steps 82288 learning_steps 4114
episode 335 score 1820.0 avg score 322.3 time_steps 84108 learning_steps 4205
episode 336 score 80.0 avg score 319.8 time_steps 84188 learning_steps 4209
ep

episode 431 score 1244.0 avg score 291.6 time_steps 110954 learning_steps 5547
episode 432 score 332.0 avg score 292.4 time_steps 111286 learning_steps 5564
episode 433 score 80.0 avg score 292.4 time_steps 111366 learning_steps 5568
episode 434 score 2655.0 avg score 317.3 time_steps 114021 learning_steps 5701
episode 435 score 396.0 avg score 303.1 time_steps 114417 learning_steps 5720
episode 436 score 80.0 avg score 303.1 time_steps 114497 learning_steps 5724
episode 437 score 82.0 avg score 303.1 time_steps 114579 learning_steps 5728
episode 438 score 80.0 avg score 303.1 time_steps 114659 learning_steps 5732
episode 439 score 80.0 avg score 292.3 time_steps 114739 learning_steps 5736
episode 440 score 82.0 avg score 289.1 time_steps 114821 learning_steps 5741
episode 441 score 829.0 avg score 296.6 time_steps 115650 learning_steps 5782
episode 442 score 819.0 avg score 304.0 time_steps 116469 learning_steps 5823
episode 443 score 79.0 avg score 304.0 time_steps 116548 learning_st

episode 537 score 82.0 avg score 328.8 time_steps 147461 learning_steps 7373
episode 538 score 82.0 avg score 328.8 time_steps 147543 learning_steps 7377
episode 539 score 81.0 avg score 328.9 time_steps 147624 learning_steps 7381
episode 540 score 249.0 avg score 330.5 time_steps 147873 learning_steps 7393
episode 541 score 1252.0 avg score 334.8 time_steps 149125 learning_steps 7456
episode 542 score 80.0 avg score 327.4 time_steps 149205 learning_steps 7460
episode 543 score 250.0 avg score 329.1 time_steps 149455 learning_steps 7472
episode 544 score 80.0 avg score 321.6 time_steps 149535 learning_steps 7476
episode 545 score 81.0 avg score 321.6 time_steps 149616 learning_steps 7480
episode 546 score 1479.0 avg score 333.9 time_steps 151095 learning_steps 7554
episode 547 score 76.0 avg score 333.9 time_steps 151171 learning_steps 7558
episode 548 score 331.0 avg score 332.3 time_steps 151502 learning_steps 7575
episode 549 score 81.0 avg score 332.4 time_steps 151583 learning_ste

episode 643 score 83.0 avg score 308.8 time_steps 180339 learning_steps 9016
episode 644 score 83.0 avg score 308.9 time_steps 180422 learning_steps 9021
episode 645 score 84.0 avg score 308.9 time_steps 180506 learning_steps 9025
episode 646 score 81.0 avg score 294.9 time_steps 180587 learning_steps 9029
episode 647 score 80.0 avg score 295.0 time_steps 180667 learning_steps 9033
episode 648 score 82.0 avg score 292.5 time_steps 180749 learning_steps 9037
episode 649 score 83.0 avg score 292.5 time_steps 180832 learning_steps 9041
episode 650 score 83.0 avg score 292.5 time_steps 180915 learning_steps 9045
episode 651 score 82.0 avg score 291.6 time_steps 180997 learning_steps 9049
episode 652 score 164.0 avg score 288.3 time_steps 181161 learning_steps 9058
episode 653 score 83.0 avg score 288.3 time_steps 181244 learning_steps 9062
episode 654 score 1332.0 avg score 295.0 time_steps 182576 learning_steps 9128
episode 655 score 666.0 avg score 300.0 time_steps 183242 learning_steps 

episode 749 score 82.0 avg score 275.9 time_steps 208425 learning_steps 10421
episode 750 score 82.0 avg score 275.9 time_steps 208507 learning_steps 10425
episode 751 score 649.0 avg score 281.6 time_steps 209156 learning_steps 10457
episode 752 score 81.0 avg score 280.8 time_steps 209237 learning_steps 10461
episode 753 score 82.0 avg score 280.8 time_steps 209319 learning_steps 10465
episode 754 score 80.0 avg score 268.2 time_steps 209399 learning_steps 10469
episode 755 score 82.0 avg score 262.4 time_steps 209481 learning_steps 10474
episode 756 score 1230.0 avg score 273.9 time_steps 210711 learning_steps 10535
episode 757 score 329.0 avg score 273.8 time_steps 211040 learning_steps 10552
episode 758 score 418.0 avg score 277.2 time_steps 211458 learning_steps 10572
episode 759 score 81.0 avg score 275.5 time_steps 211539 learning_steps 10576
episode 760 score 566.0 avg score 277.0 time_steps 212105 learning_steps 10605
episode 761 score 81.0 avg score 275.3 time_steps 212186 l

episode 854 score 163.0 avg score 254.1 time_steps 234808 learning_steps 11740
episode 855 score 84.0 avg score 254.1 time_steps 234892 learning_steps 11744
episode 856 score 247.0 avg score 244.3 time_steps 235139 learning_steps 11756
episode 857 score 1056.0 avg score 251.6 time_steps 236195 learning_steps 11809
episode 858 score 80.0 avg score 248.2 time_steps 236275 learning_steps 11813
episode 859 score 167.0 avg score 249.0 time_steps 236442 learning_steps 11822
episode 860 score 167.0 avg score 245.0 time_steps 236609 learning_steps 11830
episode 861 score 164.0 avg score 245.9 time_steps 236773 learning_steps 11838
episode 862 score 82.0 avg score 244.2 time_steps 236855 learning_steps 11842
episode 863 score 81.0 avg score 244.2 time_steps 236936 learning_steps 11846
episode 864 score 80.0 avg score 225.3 time_steps 237016 learning_steps 11850
episode 865 score 81.0 avg score 225.3 time_steps 237097 learning_steps 11854
episode 866 score 81.0 avg score 223.7 time_steps 237178 

episode 959 score 82.0 avg score 270.2 time_steps 263466 learning_steps 13173
episode 960 score 1238.0 avg score 280.9 time_steps 264704 learning_steps 13235
episode 961 score 1641.0 avg score 295.7 time_steps 266345 learning_steps 13317
episode 962 score 79.0 avg score 295.7 time_steps 266424 learning_steps 13321
episode 963 score 80.0 avg score 295.7 time_steps 266504 learning_steps 13325
episode 964 score 80.0 avg score 295.7 time_steps 266584 learning_steps 13329
episode 965 score 82.0 avg score 295.7 time_steps 266666 learning_steps 13333
episode 966 score 81.0 avg score 295.7 time_steps 266747 learning_steps 13337
episode 967 score 398.0 avg score 294.6 time_steps 267145 learning_steps 13357
episode 968 score 81.0 avg score 294.6 time_steps 267226 learning_steps 13361
episode 969 score 165.0 avg score 293.8 time_steps 267391 learning_steps 13369
episode 970 score 165.0 avg score 294.6 time_steps 267556 learning_steps 13377
episode 971 score 253.0 avg score 296.3 time_steps 267809

episode 1063 score 83.0 avg score 219.1 time_steps 288414 learning_steps 14420
episode 1064 score 165.0 avg score 219.9 time_steps 288579 learning_steps 14428
episode 1065 score 319.0 avg score 222.3 time_steps 288898 learning_steps 14444
episode 1066 score 75.0 avg score 222.3 time_steps 288973 learning_steps 14448
episode 1067 score 82.0 avg score 219.1 time_steps 289055 learning_steps 14452
episode 1068 score 84.0 avg score 219.1 time_steps 289139 learning_steps 14456
episode 1069 score 84.0 avg score 218.3 time_steps 289223 learning_steps 14461
episode 1070 score 168.0 avg score 218.3 time_steps 289391 learning_steps 14469
episode 1071 score 82.0 avg score 216.6 time_steps 289473 learning_steps 14473
episode 1072 score 81.0 avg score 216.6 time_steps 289554 learning_steps 14477
episode 1073 score 81.0 avg score 207.4 time_steps 289635 learning_steps 14481
episode 1074 score 1912.0 avg score 225.7 time_steps 291547 learning_steps 14577
episode 1075 score 83.0 avg score 225.7 time_st

episode 1167 score 82.0 avg score 302.9 time_steps 319342 learning_steps 15967
episode 1168 score 423.0 avg score 306.3 time_steps 319765 learning_steps 15988
episode 1169 score 82.0 avg score 306.2 time_steps 319847 learning_steps 15992
episode 1170 score 1554.0 avg score 320.1 time_steps 321401 learning_steps 16070
episode 1171 score 163.0 avg score 320.9 time_steps 321564 learning_steps 16078
episode 1172 score 411.0 avg score 324.2 time_steps 321975 learning_steps 16098
episode 1173 score 82.0 avg score 324.2 time_steps 322057 learning_steps 16102
episode 1174 score 81.0 avg score 305.9 time_steps 322138 learning_steps 16106
episode 1175 score 164.0 avg score 306.7 time_steps 322302 learning_steps 16115
episode 1176 score 167.0 avg score 307.6 time_steps 322469 learning_steps 16123
episode 1177 score 83.0 avg score 304.3 time_steps 322552 learning_steps 16127
episode 1178 score 81.0 avg score 304.3 time_steps 322633 learning_steps 16131
episode 1179 score 81.0 avg score 304.3 time_

episode 1271 score 331.0 avg score 252.2 time_steps 346779 learning_steps 17338
episode 1272 score 245.0 avg score 250.5 time_steps 347024 learning_steps 17351
episode 1273 score 162.0 avg score 251.3 time_steps 347186 learning_steps 17359
episode 1274 score 166.0 avg score 252.1 time_steps 347352 learning_steps 17367
episode 1275 score 82.0 avg score 251.3 time_steps 347434 learning_steps 17371
episode 1276 score 83.0 avg score 250.5 time_steps 347517 learning_steps 17375
episode 1277 score 163.0 avg score 251.3 time_steps 347680 learning_steps 17384
episode 1278 score 82.0 avg score 251.3 time_steps 347762 learning_steps 17388
episode 1279 score 83.0 avg score 251.3 time_steps 347845 learning_steps 17392
episode 1280 score 668.0 avg score 257.2 time_steps 348513 learning_steps 17425
episode 1281 score 1056.0 avg score 265.2 time_steps 349569 learning_steps 17478
episode 1282 score 78.0 avg score 264.4 time_steps 349647 learning_steps 17482
episode 1283 score 82.0 avg score 257.7 time

episode 1374 score 80.0 avg score 321.4 time_steps 379492 learning_steps 18974
episode 1375 score 81.0 avg score 321.4 time_steps 379573 learning_steps 18978
episode 1376 score 166.0 avg score 322.2 time_steps 379739 learning_steps 18986
episode 1377 score 81.0 avg score 321.4 time_steps 379820 learning_steps 18991
episode 1378 score 84.0 avg score 321.4 time_steps 379904 learning_steps 18995
episode 1379 score 983.0 avg score 330.4 time_steps 380887 learning_steps 19044
episode 1380 score 81.0 avg score 324.6 time_steps 380968 learning_steps 19048
episode 1381 score 82.0 avg score 314.8 time_steps 381050 learning_steps 19052
episode 1382 score 81.0 avg score 314.8 time_steps 381131 learning_steps 19056
episode 1383 score 81.0 avg score 314.8 time_steps 381212 learning_steps 19060
episode 1384 score 81.0 avg score 314.0 time_steps 381293 learning_steps 19064
episode 1385 score 248.0 avg score 315.6 time_steps 381541 learning_steps 19077
episode 1386 score 163.0 avg score 316.4 time_ste

episode 1478 score 81.0 avg score 245.6 time_steps 404463 learning_steps 20223
episode 1479 score 169.0 avg score 237.4 time_steps 404632 learning_steps 20231
episode 1480 score 161.0 avg score 238.2 time_steps 404793 learning_steps 20239
episode 1481 score 81.0 avg score 238.2 time_steps 404874 learning_steps 20243
episode 1482 score 80.0 avg score 238.2 time_steps 404954 learning_steps 20247
episode 1483 score 81.0 avg score 238.2 time_steps 405035 learning_steps 20251
episode 1484 score 166.0 avg score 239.1 time_steps 405201 learning_steps 20260
episode 1485 score 164.0 avg score 238.2 time_steps 405365 learning_steps 20268
episode 1486 score 81.0 avg score 237.4 time_steps 405446 learning_steps 20272
episode 1487 score 164.0 avg score 238.2 time_steps 405610 learning_steps 20280
episode 1488 score 83.0 avg score 238.2 time_steps 405693 learning_steps 20284
episode 1489 score 250.0 avg score 239.9 time_steps 405943 learning_steps 20297
episode 1490 score 80.0 avg score 235.0 time_s

episode 1582 score 249.0 avg score 262.3 time_steps 431183 learning_steps 21559
episode 1583 score 166.0 avg score 263.1 time_steps 431349 learning_steps 21567
episode 1584 score 249.0 avg score 264.0 time_steps 431598 learning_steps 21579
episode 1585 score 248.0 avg score 264.8 time_steps 431846 learning_steps 21592
episode 1586 score 83.0 avg score 264.8 time_steps 431929 learning_steps 21596
episode 1587 score 80.0 avg score 264.0 time_steps 432009 learning_steps 21600
episode 1588 score 246.0 avg score 265.6 time_steps 432255 learning_steps 21612
episode 1589 score 80.0 avg score 263.9 time_steps 432335 learning_steps 21616
episode 1590 score 667.0 avg score 269.8 time_steps 433002 learning_steps 21650
episode 1591 score 566.0 avg score 273.8 time_steps 433568 learning_steps 21678
episode 1592 score 80.0 avg score 272.1 time_steps 433648 learning_steps 21682
episode 1593 score 250.0 avg score 273.8 time_steps 433898 learning_steps 21694
episode 1594 score 83.0 avg score 267.9 time

episode 1686 score 164.0 avg score 273.8 time_steps 459309 learning_steps 22965
episode 1687 score 82.0 avg score 273.8 time_steps 459391 learning_steps 22969
episode 1688 score 81.0 avg score 272.2 time_steps 459472 learning_steps 22973
episode 1689 score 164.0 avg score 273.0 time_steps 459636 learning_steps 22981
episode 1690 score 2297.0 avg score 289.3 time_steps 461933 learning_steps 23096
episode 1691 score 166.0 avg score 285.3 time_steps 462099 learning_steps 23104
episode 1692 score 1473.0 avg score 299.2 time_steps 463572 learning_steps 23178
episode 1693 score 76.0 avg score 297.5 time_steps 463648 learning_steps 23182
episode 1694 score 83.0 avg score 297.5 time_steps 463731 learning_steps 23186
episode 1695 score 252.0 avg score 299.2 time_steps 463983 learning_steps 23199
episode 1696 score 244.0 avg score 288.5 time_steps 464227 learning_steps 23211
episode 1697 score 80.0 avg score 288.5 time_steps 464307 learning_steps 23215
episode 1698 score 740.0 avg score 292.6 ti

episode 1790 score 82.0 avg score 293.5 time_steps 491282 learning_steps 24564
episode 1791 score 163.0 avg score 293.5 time_steps 491445 learning_steps 24572
episode 1792 score 167.0 avg score 280.4 time_steps 491612 learning_steps 24580
episode 1793 score 164.0 avg score 281.3 time_steps 491776 learning_steps 24588
episode 1794 score 83.0 avg score 281.3 time_steps 491859 learning_steps 24592
episode 1795 score 81.0 avg score 279.6 time_steps 491940 learning_steps 24597
episode 1796 score 402.0 avg score 281.1 time_steps 492342 learning_steps 24617
episode 1797 score 78.0 avg score 281.1 time_steps 492420 learning_steps 24621
episode 1798 score 80.0 avg score 274.5 time_steps 492500 learning_steps 24625
episode 1799 score 167.0 avg score 270.4 time_steps 492667 learning_steps 24633
episode 1800 score 167.0 avg score 263.1 time_steps 492834 learning_steps 24641
episode 1801 score 419.0 avg score 266.5 time_steps 493253 learning_steps 24662
episode 1802 score 1000.0 avg score 274.8 tim

episode 1894 score 499.0 avg score 220.5 time_steps 513913 learning_steps 25695
episode 1895 score 505.0 avg score 224.8 time_steps 514418 learning_steps 25720
episode 1896 score 81.0 avg score 221.6 time_steps 514499 learning_steps 25724
episode 1897 score 80.0 avg score 221.6 time_steps 514579 learning_steps 25728
episode 1898 score 83.0 avg score 221.6 time_steps 514662 learning_steps 25733
episode 1899 score 163.0 avg score 221.6 time_steps 514825 learning_steps 25741
episode 1900 score 82.0 avg score 220.7 time_steps 514907 learning_steps 25745
episode 1901 score 317.0 avg score 219.7 time_steps 515224 learning_steps 25761
episode 1902 score 75.0 avg score 210.5 time_steps 515299 learning_steps 25764
episode 1903 score 917.0 avg score 218.8 time_steps 516216 learning_steps 25810
episode 1904 score 752.0 avg score 225.5 time_steps 516968 learning_steps 25848
episode 1905 score 82.0 avg score 225.5 time_steps 517050 learning_steps 25852
episode 1906 score 82.0 avg score 222.3 time_s

episode 1998 score 83.0 avg score 242.0 time_steps 538858 learning_steps 26942
episode 1999 score 409.0 avg score 244.4 time_steps 539267 learning_steps 26963


In [74]:
if __name__ == '__main__':
    env = ChromeDinoEnv(render_mode='human')
    N = 20
    batch_size = 5
    n_epochs = 4
    alpha = 0.0003
    agent = Agent(n_actions=env.action_space.n, batch_size=batch_size, alpha=alpha, n_epochs=n_epochs, input_dims=env.observation_space.shape)
    agent.load_models() 
    
    figure_file = 'plots/ChromeDino.png'
    
    best_score = env.reward_range[0]
    score_hist = []
    
    learn_iters = 0
    avg_score = 0
    n_steps = 0
    
    
    observation = env.reset()
    done = False
    while not done:
        action, prob, val = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        n_steps += 1
        score += reward
        #agent.store_transition(observation, action, prob, val, reward, done)
        observation = observation_
        score_hist.append(score)
        avg_score = np.mean(score_hist[-100:])
        

... loading models ...


  "Box bound precision lowered by casting to {}".format(self.dtype)


14


In [40]:
env = ChromeDinoEnv()
#env._get_obs()
#env.observation_space.sample()
#env.reset()

array([ 286., 2419.,  309.,   14.])

In [16]:
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker
from stable_baselines3 import DQN
import tensorflow as tf

In [18]:
env_checker.check_env(env)

In [19]:
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
            
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, f'best_model{self.n_calls}')
            self.model.save(model_path)
        return True

In [20]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [21]:
callback = TrainAndLoggingCallback(check_freq=1000, save_path=CHECKPOINT_DIR)
callback._init_callback()

In [22]:
model = DQN('MlpPolicy', env, buffer_size=120000, learning_starts=1000)

In [35]:
#model.learn(total_timesteps=5000, callback=callback)
#model.save('dqn_chrome_dino')

In [25]:
env = ChromeDinoEnv(render_mode='human')