In [1]:
import sys
from pathlib import Path

# Ajouter le dossier src au path
src_path = Path("..") / ".."
sys.path.append(str(src_path))
%load_ext autoreload
%autoreload 2

import time
from env.snake_env import SnakeEnv
import random
from agent.utils import load_agent, list_agents

In [2]:
list_agents(5)

{'5x5': [{'uuid': '4a114c0d-7b57-4d45-8a52-9fbdfba1dc28',
   'algorithm': 'PPO',
   'grid_size': 5,
   'n_envs': 4,
   'total_timesteps': 1000,
   'training_date': '20260113_205510',
   'agent_filename': 'agent_4a114c0d-7b57-4d45-8a52-9fbdfba1dc28.zip',
   'agent_path': 'saved_agents/5x5/agent_4a114c0d-7b57-4d45-8a52-9fbdfba1dc28.zip'}]}

In [3]:
def play_snake(
        grid_size: int = 5,
        agent_uuid: str | None = None,
        selection: str = "latest",
        max_steps: int = 100,
        delay: float = 0.5,
        random_play: bool = False
):
    """
    Joue au Snake soit avec un agent entraîné, soit aléatoirement.

    Args:
        grid_size: Taille de la grille du jeu (ex: 5 pour 5x5)
        agent_uuid: UUID spécifique d'un agent (complet ou 8 premiers caractères).
                    Si None, utilise 'selection'
        selection: 'latest' pour le dernier agent, 'random' pour un agent aléatoire
        max_steps: Nombre maximum de steps à jouer
        delay: Délai entre chaque action (en secondes)
        random_play: Si True, joue avec des actions aléatoires (ignore les agents)

    Returns:
        float: Score total de la partie
    """
    env = SnakeEnv(grid_size=grid_size, render_mode="human")
    obs, info = env.reset()

    if random_play:
        model = None
        print("Mode aléatoire activé - Le jeu choisit des actions au hasard.\n")
    else:
        try:
            model, agent_info = load_agent(
                grid_size=grid_size,
                agent_uuid=agent_uuid,
                selection=selection
            )
            print(f"Prêt à jouer avec l'agent !\n")
        except (FileNotFoundError, ValueError) as e:
            print(f"❌ Erreur: {e}")
            print("Passage en mode aléatoire...\n")
            model = None

    total_reward = 0

    for step in range(max_steps):
        if model:
            action, _states = model.predict(obs, deterministic=True)
        else:
            action = random.choice([0, 1, 2, 3])

        print(f"Step {step + 1} - Action: {action}")

        obs, reward, terminated, truncated, info = env.step(action)
        total_reward += reward
        env.render()
        print(f"Récompense: {reward} | Score total: {total_reward}\n")

        if terminated or truncated:
            print(f"Partie terminée !")
            print(f"Score final: {total_reward}")
            print(f"Steps effectués: {step + 1}")
            print(f"Récompense finale: {total_reward}\n")
            break

        time.sleep(delay)

    env.close()


In [11]:
# play_snake(grid_size=10, selection="latest")
play_snake(grid_size=5, agent_uuid="4a114c0d-7b57-4d45-8a52-9fbdfba1dc28")

✓ Agent chargé: agent_4a114c0d-7b57-4d45-8a52-9fbdfba1dc28.zip
  UUID: 4a114c0d-7b57-4d45-8a52-9fbdfba1dc28
  Algorithme: PPO
  Entraîné avec 1,000 timesteps
Prêt à jouer avec l'agent !

Step 1 - Action: 2
[37m┌──────────┐[0m
[37m│[0m          [37m│[0m
[37m│[0m          [37m│[0m
[37m│[0m  [32m■ [0m      [37m│[0m
[37m│[0m[31m■[0m         [37m│[0m
[37m│[0m          [37m│[0m
[37m└──────────┘[0m

Récompense: 0 | Score total: 0

Step 2 - Action: 1
[37m┌──────────┐[0m
[37m│[0m          [37m│[0m
[37m│[0m          [37m│[0m
[37m│[0m          [37m│[0m
[37m│[0m[31m■[0m [32m■ [0m      [37m│[0m
[37m│[0m          [37m│[0m
[37m└──────────┘[0m

Récompense: 0 | Score total: 0

Step 3 - Action: 2
[37m┌──────────┐[0m
[37m│[0m          [37m│[0m
[37m│[0m          [37m│[0m
[37m│[0m      [31m■[0m   [37m│[0m
[37m│[0m[32m■ [0m[33m■ [0m      [37m│[0m
[37m│[0m          [37m│[0m
[37m└──────────┘[0m

Récompense: 1 | Score total: 