In [1]:
# All imports go here in one place
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pyautogui
import cv2
import time
import os

# All stable-baselines3 imports
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.env_util import make_vec_env

# Your custom environment
from dino_env import DinoEnv

In [2]:
# --- 1. CONFIGURE YOUR TRAINING SESSION ---
SESSION_NAME = "PPO_Optimized_Run1"

# --- 2. SETUP DIRECTORIES ---
models_dir = f"models/{SESSION_NAME}"
logdir = f"logs/{SESSION_NAME}"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
if not os.path.exists(logdir):
    os.makedirs(logdir)

# --- 3. AUTOMATICALLY DETECT LATEST CHECKPOINT ---
latest_checkpoint = 0
if os.path.exists(models_dir) and len(os.listdir(models_dir)) > 0:
    # Find all saved model files and extract the timestep number
    checkpoints = [int(f.split('.')[0]) for f in os.listdir(models_dir) if f.endswith('.zip')]
    if checkpoints:
        latest_checkpoint = max(checkpoints)

# --- 4. CREATE THE ENVIRONMENT ---
env = make_vec_env(DinoEnv, n_envs=1)
env = VecFrameStack(env, n_stack=4)

# --- 5. DEFINE LEARNING RATE ---
def linear_schedule(initial_value):
    def func(progress_remaining):
        return progress_remaining * initial_value
    return func

# --- 6. LOAD MODEL OR CREATE NEW ONE ---
if latest_checkpoint > 0:
    # --- RESUME TRAINING ---
    MODEL_PATH = f"{models_dir}/{latest_checkpoint}.zip"
    print(f"✅ Resuming training from checkpoint: {MODEL_PATH}")
    model = PPO.load(MODEL_PATH, env=env, tensorboard_log=logdir)
    # Optional: Reset learning rate to encourage new exploration
    model.learning_rate = linear_schedule(0.00005)
else:
    # --- START FRESH TRAINING ---
    print(f"✅ Starting a new training session: {SESSION_NAME}")
    model = PPO(
        'CnnPolicy', env, n_steps=4096, gamma=0.99,
        learning_rate=linear_schedule(0.0001),
        verbose=1, tensorboard_log=logdir
    )

# --- 7. START THE TRAINING LOOP ---
TIMESTEPS_PER_INTERVAL = 20000
TOTAL_INTERVALS = 50 # Let's aim for 1 million total steps

# Calculate the starting interval based on our loaded checkpoint
starting_interval = (latest_checkpoint // TIMESTEPS_PER_INTERVAL) + 1

for i in range(starting_interval, TOTAL_INTERVALS + 1):
    print(f"--- Training Interval {i} of {TOTAL_INTERVALS} ---")

    model.learn(total_timesteps=TIMESTEPS_PER_INTERVAL, reset_num_timesteps=False, tb_log_name=SESSION_NAME)

    save_path = f"{models_dir}/{i * TIMESTEPS_PER_INTERVAL}"
    model.save(save_path)
    print(f"Checkpoint saved to {save_path}.zip")

print("--- Training complete! ---")
env.close()

Attempting to locate the game window...
Opening in existing browser session.
✅ Found 'replay_button.png'.
Game region calculated: {'left': 90, 'top': 235, 'width': 840, 'height': 200}
Score region calculated: {'left': 690, 'top': 235, 'width': 235, 'height': 50}
✅ Starting a new training session: PPO_Optimized_Run1
Using cpu device
Wrapping the env in a VecTransposeImage.
--- Training Interval 1 of 50 ---
Logging to logs/PPO_Optimized_Run1/PPO_Optimized_Run1_0


FailSafeException: PyAutoGUI fail-safe triggered from mouse moving to a corner of the screen. To disable this fail-safe, set pyautogui.FAILSAFE to False. DISABLING FAIL-SAFE IS NOT RECOMMENDED.

In [None]:
import pyautogui
import cv2
import numpy as np

# {'left': 100, 'top': 265, 'width': 650, 'height': 180}
# --- Paste the coordinates from your log output here ---
game_region = {'left': 80, 'top': 230, 'width': 840, 'height': 200}

# --- Calculate the score region based on the game region ---
score_region = {
    'left': game_region['left'] + 600,
    'top': game_region['top'],
    'width': 235,
    'height': 50
}

# ----------------------------------------------------

# Take a screenshot of the entire screen
screenshot = pyautogui.screenshot()
frame = np.array(screenshot)

# Convert colors from RGB (pyautogui) to BGR (OpenCV)
# <-- THE FIX IS HERE
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

# --- Draw the GREEN box for the game region ---
x1_game, y1_game = game_region['left'], game_region['top']
x2_game, y2_game = game_region['left'] + game_region['width'], game_region['top'] + game_region['height']
cv2.rectangle(frame, (x1_game, y1_game), (x2_game, y2_game), (0, 255, 0), 2) # Green

# --- Draw the BLUE box for the score region ---
x1_score, y1_score = score_region['left'], score_region['top']
x2_score, y2_score = score_region['left'] + score_region['width'], score_region['top'] + score_region['height']
cv2.rectangle(frame, (x1_score, y1_score), (x2_score, y2_score), (255, 0, 0), 2) # Blue

# Display the result in a new window
cv2.imshow("Region Test", frame)

print("A window named 'Region Test' has opened.")
print("The GREEN box is the AI's main vision.")
print("The BLUE box is where the AI reads the score.")
print("Press any key on that window to close it.")

# Wait for you to press a key, then close the window
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
m