 1. Installer les dépendances

In [None]:
# Install Stable-Baselines3, Gymnasium, and Atari dependencies
!apt-get update -qq
!apt-get install -y xvfb swig cmake ffmpeg > /dev/null 2>&1
!pip install stable-baselines3[extra] gymnasium[atari,accept-rom-license] ale-py pyvirtualdisplay -q

# Set up a virtual display (needed for rendering in Colab)
from pyvirtualdisplay import Display
import os

os.system("Xvfb :1 -screen 0 1400x900x24 &")  # Manually start Xvfb
display = Display(visible=0, size=(1400, 900))
display.start()

print("Virtual display started successfully!")

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
[0mVirtual display started successfully!


2. Importer des bibliothèques

In [None]:
import gymnasium as gym
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# Verify installation of Atari Learning Environment (ALE)
import ale_py
print("ALE-Py version:", ale_py.__version__)

ALE-Py version: 0.11.0


3. Créez l'environnement de Space Invaders

In [None]:
# Create the Space Invaders environment
env = gym.make("ALE/SpaceInvaders-v5", render_mode="rgb_array")

# Wrap it in a DummyVecEnv for Stable-Baselines3 compatibility
env = DummyVecEnv([lambda: env])

# Print action and observation space
print("Action Space:", env.action_space)
print("Observation Space:", env.observation_space)

Action Space: Discrete(6)
Observation Space: Box(0, 255, (210, 160, 3), uint8)


4. Définir le modèle Deep Q-Network (DQN)

In [None]:
# Define the DQN model
model = DQN(
    "CnnPolicy",  # Use a Convolutional Neural Network (CNN) to process images
    env,
    learning_rate=1e-4,  # Learning rate
    buffer_size=100000,  # Experience replay buffer size
    batch_size=32,  # Mini-batch size
    learning_starts=50000,  # Steps before training starts
    target_update_interval=1000,  # Update target network every 1000 steps
    train_freq=4,  # Update every 4 steps
    gradient_steps=1,  # Gradient updates per step
    exploration_fraction=0.1,  # Percentage of training used for exploration
    exploration_final_eps=0.01,  # Minimum exploration rate
    verbose=1,  # Print training logs
    tensorboard_log="./dqn_space_invaders_logs/"  # Tensorboard logging
)

# Print model summary
print(model)

Using cuda device
Wrapping the env in a VecTransposeImage.




<stable_baselines3.dqn.dqn.DQN object at 0x7b68f38754d0>


5. Former l'agent 🎮

In [None]:
# Train the model for 1 million timesteps
model.learn(total_timesteps=1_000_000)

# Save the trained model
model.save("dqn_space_invaders")
print("Model saved successfully!")

Logging to ./dqn_space_invaders_logs/DQN_2
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.982    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 732      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1782     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.952    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 871      |
|    time_elapsed     | 5        |
|    total_timesteps  | 4816     |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.936    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 893      |
|    time_elapsed     | 7        |
|    total_timesteps  | 6461     |
----------------------------------
------------

6. Évaluer l'agent formé

In [None]:
# Load the trained model
model = DQN.load("dqn_space_invaders")

# Evaluate the model for 10 episodes
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10, deterministic=True)

# Print evaluation results
print(f"Mean reward: {mean_reward:.2f} ± {std_reward:.2f}")

7. Regardez l'agent jouer ! 🎥

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import base64
from IPython.display import HTML

# Function to unwrap DummyVecEnv and record gameplay
def record_video(env, model, video_length=500, video_folder="videos/"):
    env = env.envs[0]  # ✅ Unwrap the DummyVecEnv
    env = gym.wrappers.RecordVideo(env, video_folder=video_folder, episode_trigger=lambda x: True)

    obs, _ = env.reset()

    for _ in range(video_length):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, _ = env.step(action)
        if terminated or truncated:
            break

    env.close()
    print("Video recorded successfully!")

# Function to display the recorded video
def show_video(video_folder="videos/"):
    video_list = glob.glob(os.path.join(video_folder, "*.mp4"))
    if len(video_list) == 0:
        print("No videos found! Make sure the agent played at least one episode.")
        return

    video_path = video_list[0]  # Get the first recorded video
    mp4 = open(video_path, "rb").read()
    data_url = "data:video/mp4;base64," + base64.b64encode(mp4).decode()
    return HTML(f'<video width="600" height="400" controls><source src="{data_url}" type="video/mp4"></video>')

# ✅ Record the gameplay
record_video(env, model)

# ✅ Display the recorded video in Colab
show_video()