In [None]:
%pip install torch gymnasium ale-py stable-baselines3 tensorboard

In [None]:
import gymnasium as gym
from stable_baselines3 import DQN
from torch.optim import RMSprop
from stable_baselines3.common.atari_wrappers import NoopResetEnv
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.monitor import Monitor
import ale_py

# Create the environment
def make_env():
    def _init():
        env = gym.make("ALE/Breakout-v5", render_mode="rgb_array")
        env = NoopResetEnv(env, noop_max=30)  # Add the No-op wrapper
        env = Monitor(env)
        return env
    return _init

env = DummyVecEnv([make_env() for _ in range(1)])


# Initialize the DQN model
model = DQN(
    "CnnPolicy",        # Use a convolutional network policy
    env,
    learning_rate=0.00025,       # Standard learning rate for DQN
    buffer_size=100000,           # Smaller replay buffer for basic DQN
    learning_starts=2000,          # Start learning after 1,000 steps
    batch_size=32,                  # Training batch size
    gamma=0.99,                      # Discount factor for future rewards
    train_freq=4,                   # Train the model every 4 steps
    target_update_interval=10000,  # Update the target network every 1,000 steps
    exploration_fraction=0.1,     # Fraction of steps to explore
    exploration_final_eps=0.1,   # Final epsilon for exploration
    verbose=1,
    tensorboard_log="./dqn_logs/",  # Path for TensorBoard logs
    device="cuda",
)

# Define the optimiser
model.policy.optimizer = RMSprop(
    model.policy.parameters(),
    lr=0.00025,
    alpha=0.95,       # Equivalent to squared gradient momentum
    momentum=0.95,    # Gradient momentum
    eps=0.01,         # Prevent division by zero
)

# Train the model
model.learn(total_timesteps=2000000) 

# Save the trained model
model.save("dqn_breakout")

# Close the environment
env.close()


In [None]:
from gymnasium.wrappers import RecordVideo
import os

# Load the trained model
loaded_model = DQN.load("dqn_breakout")


# Create the environment and wrap it for video recording
def make_env():
    def _init():
        env = gym.make("ALE/Breakout-v5", render_mode="rgb_array")
        env = NoopResetEnv(env, noop_max=30)  # Add No-op wrapper
        env = Monitor(env)                   # Add Monitor wrapper for logging
        env = RecordVideo(env, video_folder=video_folder, episode_trigger=lambda x: x == 0)  # Record the first episode
        return env
    return _init


env = make_env()()

video_folder = "./videos/"
env = RecordVideo(env, video_folder=video_folder, episode_trigger=lambda x: x == 0)  # Record the first episode

# Reset the environment and press FIRE to start the game
obs, _ = env.reset()
obs, reward, terminated, truncated, info = env.step(1)  # Press 'FIRE' action to start
if terminated or truncated:
    obs, _ = env.reset()

# Play one episode and record the video
done = False
while not done:
    action, _ = loaded_model.predict(obs, deterministic=True)  # Predict the action
    obs, reward, terminated, truncated, info = env.step(action)  # Step the environment
    done = terminated or truncated  # Check if the game is over

# Close the environment
env.close()
print(f"Video saved to: {video_folder}")


In [None]:
%load_ext tensorboard
%tensorboard --logdir=./dqn_basic_logs


Reusing TensorBoard on port 6006 (pid 46733), started 0:02:11 ago. (Use '!kill 46733' to kill it.)