In [9]:
import sys
import os
if os.path.abspath(os.path.join(os.getcwd(), '../..')) not in sys.path:
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../..')))

print(sys.path)


['/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/home/rfogel/projects/snakegame/venv/lib/python3.10/site-packages', '/home/rfogel/projects/snakegame']


In [10]:
from src.objects import gameengine
from src.objects.actorcriticalgorithm import ActorCriticAlgorithm
from src.objects.gameengine import GameEngine
from src.objects.actorcritic import ActorCritic
from src.objects.grid import Grid
from src.objects.rabbit import Rabbit
from src.objects.snake import Snake
from src.utils.utils import create_rabbits


In [11]:
import tensorflow as tf
import numpy as np
from numpy import random
import keras
from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy.special import softmax


def plot_metrics(episode_rewards, losses):
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(episode_rewards)
    plt.xlabel('Épisodes')
    plt.ylabel('Récompenses Cumulées')
    plt.title('Récompenses par Épisode')

    plt.subplot(1, 2, 2)
    plt.plot(losses)
    plt.xlabel('Épisodes')
    plt.ylabel('Pertes')
    plt.title('Pertes par Épisode')

    plt.tight_layout()
    plt.show()


def train_actor_critic(env: GameEngine,
                       algorithm: ActorCriticAlgorithm,
                       num_episodes: int = 1000):
    episode_rewards = []
    losses = []
    for episode in tqdm(range(num_episodes),
                        desc="Entrainement en cours",
                        unit="épisode"):

        env.reset(random_grid_size=False)
        state_tensor = env.get_state_tensor()
        done = False
        episode_reward = 0
        while not done:

            n_rabbits = len(env.rabbits)

            action_probs, _ = algorithm.model(state_tensor)
            action_probs = action_probs[:, :n_rabbits]
            action_probs = np.squeeze(action_probs.numpy())
            action_probs = softmax(action_probs)
            chosen_rabbit_index = np.random.choice(n_rabbits, p=action_probs)

            next_state, reward, done = env.step(action=chosen_rabbit_index)
            episode_reward += reward

            loss = algorithm.train_step(state_tensor, chosen_rabbit_index,
                                        reward, next_state, done)
            state_tensor = next_state

            losses.append(loss)
            episode_rewards.append(episode_reward)

            if (episode + 1) % 100 == 0:
                avg_reward = np.mean(episode_rewards[-100:])
                avg_loss = np.mean(losses[-100:])
                print(
                    f"Épisode {episode + 1}/{num_episodes} | Récompense Moyenne : {avg_reward:.2f} | Pertes Moyennes : {avg_loss:.4f}"
                )
    print("Entraînement terminé.")
    plot_metrics(episode_rewards, losses)


In [12]:
# Train model from scratch
max_number_rabbits = int(150 * 100 * 0.3)

snake = Snake()
algorithm = ActorCriticAlgorithm(ActorCritic(max_number_rabbits))

width, height = random.randint(20, 150), random.randint(20, 100)

nb_lapins = random.randint(1, max_number_rabbits)

rabbits = create_rabbits(width=width, height=height, nb_lapins=nb_lapins)
grid = Grid(width=width, height=height)
gameengine = GameEngine(snake=snake,
                        rabbits=rabbits,
                        grid=grid,
                        algorithm=algorithm)

#train
train_actor_critic(env=gameengine, algorithm=algorithm)

KeyboardInterrupt: 

In [9]:
keras.saving.save_model(algorithm.model, "model.keras")

In [None]:
# Train from existing model
model = keras.saving.load_model("model.keras")