##### Import the Libraries

In [7]:
import cv2
import numpy as np
import time
import keyboard
import pytesseract
from mss import mss
from gymnasium import Env  # Use gymnasium instead of gym
from gymnasium.spaces import Box, Discrete  # Use gymnasium.spaces
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
import matplotlib.pyplot as plt

### Create an environment

In [8]:
class ChromeDinoEnv(Env):
    def __init__(self):
        super(ChromeDinoEnv, self).__init__()
        
        # Define action space: 0 = do nothing, 1 = jump, 2 = duck
        self.action_space = Discrete(3)
        
        # Define observation space (screen capture dimensions)
        self.observation_space = Box(low=0, high=255, shape=(300, 600, 3), dtype=np.uint8)
        
        # Screen capture setup
        self.sct = mss()
        self.game_window = {'top': 200, 'left': 100, 'width': 600, 'height': 300}  # Adjust based on your screen
        
        # Initialize game state
        self.game_over = False

    def reset(self, **kwargs):
        # Reset the game (press space to restart)
        keyboard.press_and_release('space')
        time.sleep(1)  # Wait for the game to restart
        
        # Capture the initial observation
        observation = self._get_observation()
        self.game_over = False
        return observation, {}  # Return observation and info (empty dict)

    def step(self, action):
        # Perform the action (0 = do nothing, 1 = jump, 2 = duck)
        if action == 1:
            keyboard.press_and_release('up')
        elif action == 2:
            keyboard.press_and_release('down')
        # Wait for a short time to allow the game to update
        time.sleep(0.1)
        
        # Capture the new observation
        observation = self._get_observation()
        
        # Check if the game is over
        self.game_over = self._check_game_over(observation)
        
        # Define reward and done flag
        reward = 1 if not self.game_over else -10
        done = self.game_over
        
        # Additional info (optional)
        info = {}
        
        return observation, reward, done, False, info  # Return truncated=False for Gymnasium

    def _get_observation(self):
        # Capture the screen
        screenshot = self.sct.grab(self.game_window)
        img = np.array(screenshot)
        
        # Convert to RGB (OpenCV uses BGR by default)
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
        
        # Resize if necessary (optional)
        img = cv2.resize(img, (600, 300))
        
        return img

    def _check_game_over(self, observation):
        # Use OCR to detect the "Game Over" text
        gray = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
        text = pytesseract.image_to_string(gray)
        
        # Check if "Game Over" is in the text
        if "Game" in text:
            return True
        return False

    def render(self, mode='human'):
        # Display the current observation (optional)
        cv2.imshow('Game', self._get_observation())
        cv2.waitKey(1)

    def close(self):
        # Clean up
        cv2.destroyAllWindows()

#### Train the Model

In [11]:
# Create the environment
env = ChromeDinoEnv()
env = Monitor(env, './dqn/logs')
env = DummyVecEnv([lambda: env])

# Define callbacks
checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./dqn/models/', name_prefix='dino_model')
eval_callback = EvalCallback(env, best_model_save_path='./dqn/models/', log_path='./dqn/logs/', eval_freq=500, deterministic=True, render=False)

# Initialize the DQN model with Double Q-Learning
# Initialize the DQN model
model = DQN(
    'CnnPolicy', 
    env, 
    verbose=1, 
    tensorboard_log='./dqn/tensorboard/', 
    learning_starts=1000, 
    buffer_size=50000, 
    learning_rate=0.0001, 
    batch_size=32, 
    tau=1.0, 
    gamma=0.99, 
    train_freq=4, 
    gradient_steps=1, 
    target_update_interval=1000, 
    exploration_fraction=0.1, 
    exploration_final_eps=0.01
)

# Train the model
model.learn(total_timesteps=100000, callback=[checkpoint_callback, eval_callback])

# Save the final model
model.save('./dqn/models/final_dino_model')

Using cpu device
Wrapping the env in a VecTransposeImage.




Logging to ./dqn/tensorboard/DQN_1




KeyboardInterrupt: 

### Load and Test the Model

In [None]:
# Load the model
model = DQN.load('./dqn/models/final_dino_model')

# Test the model
obs = env.reset()
for _ in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, truncated, info = env.step(action)
    env.render()
    if dones:
        obs = env.reset()

#### Display Metrics

In [None]:
import pandas as pd

# Load the logs
logs = pd.read_csv('./dqn/logs/monitor.csv', skiprows=1)

# Plot the rewards
plt.figure(figsize=(10, 5))
plt.plot(logs['r'])
plt.xlabel('Timesteps')
plt.ylabel('Rewards')
plt.title('Training Rewards Over Time')
plt.show()

# Plot the episode lengths
plt.figure(figsize=(10, 5))
plt.plot(logs['l'])
plt.xlabel('Timesteps')
plt.ylabel('Episode Length')
plt.title('Episode Length Over Time')
plt.show()