In [1]:
import sys
import os
if os.path.abspath(os.path.join(os.getcwd(), '../..')) not in sys.path:
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../..')))


In [2]:
from src.objects.actorcriticalgorithm import ActorCriticAlgorithm
from src.objects.gameengine import GameEngine
from src.objects.actorcritic import ActorCritic
from src.objects.grid import Grid
from src.objects.snake import Snake
from src.utils.utils import create_rabbits
from typing import Tuple
import collections
import numpy as np
import keras
import statistics
import tqdm
import matplotlib.pyplot as plt
import tensorflow as tf
from PyQt5.QtCore import Qt
from src.utils.astar import Astar

2024-09-12 01:36:13.403196: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-12 01:36:13.411564: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-12 01:36:13.413976: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-12 01:36:13.420432: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
@tf.function
def run_episode(
    env: GameEngine,
    initial_state: tf.Tensor,
    model: tf.keras.Model,
) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
    action_probs = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    values = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    rewards = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)
    state = initial_state
    done = tf.constant(False, dtype=tf.bool)
    step_index = 0
    tf.autograph.experimental.set_loop_options(
        shape_invariants=[(done, tf.TensorShape([])), (state, state.shape)])

    while not done:

        action_logits_step, value = model(state)
        action_logits_step = tf.reshape(action_logits_step, [1, -1])

        action = tf.random.categorical(action_logits_step, 1)[0, 0]

        action_probs_step = tf.nn.softmax(action_logits_step)
        values = values.write(step_index, tf.squeeze(value))

        action_probs = action_probs.write(step_index,
                                          action_probs_step[0, action])

        next_state, reward, done = env.tf_step(action=action)

        rewards = rewards.write(step_index, reward)

        # if tf.cast(done, tf.bool):
        #     break

        state = next_state
        step_index += 1

    action_probs = action_probs.stack()
    values = values.stack()
    rewards = rewards.stack()
    return action_probs, value, rewards

In [4]:
@tf.function
def train_step(env: GameEngine, initial_state: tf.Tensor,
               model: tf.keras.Model, gamma: float,
               optimizer: tf.keras.optimizers.Optimizer):
    with tf.GradientTape() as tape:

        action_probs, values, rewards = run_episode(
            env=env, initial_state=initial_state, model=model)

        returns = get_expected_return(rewards=rewards, gamma=gamma)

        loss = compute_loss(action_probs, values, returns)
        # Appliquer les gradients pour mettre à jour le modèle
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    episode_reward = tf.math.reduce_mean(rewards)
    return episode_reward


huber_loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM)


def compute_loss(
    actions_probs: tf.Tensor,
    values: tf.Tensor,
    returns: tf.Tensor,
) -> tf.Tensor:
    """Compute the combined actor-critic loss"""
    advantage = returns - values
    action_log_probs = tf.math.log(actions_probs)
    actor_loss = -tf.math.reduce_sum(action_log_probs * advantage)
    critic_loss = huber_loss(values, returns)
    return actor_loss + critic_loss


eps = np.finfo(np.float32).eps.item()


def get_expected_return(rewards: tf.Tensor,
                        gamma: float,
                        standardize: bool = True) -> tf.Tensor:
    """
      Compute expected returns per timestep.
        La séquence de récompenses pour chaque pas de temps collecté au cours d'un épisode est convertie en une séquence de rendements attendus dans laquelle la somme des récompenses est prise du pas de temps actuel t à T et chaque fois la récompense est multipliée par un facteur de réduction décroissant de manière exponentielle gamma:
        Depuis gamma appartient à l'intervalle [0,1], les récompenses plus éloignées du pas de temps actuel ont moins de poids.
        Intuitivement, le rendement attendu implique simplement que les récompenses maintenant sont meilleures que les récompenses plus tard. Au sens mathématique, il s'agit de s'assurer que la somme des récompenses converge.
        Pour stabiliser la formation, la séquence résultante des rendements est également standardisée (c'est-à-dire pour avoir une moyenne nulle et un écart-type unitaire).
    """

    n = tf.shape(rewards)[0]
    returns = tf.TensorArray(dtype=tf.float32, shape=n)
    # Start from the end of `rewards` and accumulate reward sums
    # into the `returns` array
    rewards = tf.cast(rewards[::-1], dtype=tf.float32)
    discounted_sum = tf.constant(0.0)
    discounted_sum_shape = discounted_sum.shape
    for i in tf.range(n):
        reward = rewards[i]
        discounted_sum = reward + gamma * discounted_sum
        returns = returns.write(i, discounted_sum)
    returns = returns.stack()[::-1]
    if standardize:
        returns = ((returns - tf.math.reduce_mean(returns) /
                    (tf.math.reduce_std(returns) + eps)))
    return returns


In [5]:


n_directions = 4

snake = Snake(body=[(3, 3), (3, 4), (3, 5)], direction=Qt.Key_Up)
model = ActorCritic(n_directions)

# width, height = random.randint(20, 150), random.randint(20, 100)
width, height = 8, 8
nb_lapins = 1

rabbits = create_rabbits(width=width,
                         height=height,
                         n_rabbits=nb_lapins,
                         snake=snake)

grid = Grid(width=width, height=height)
algorithm = Astar(
)  #Objet qui ne servira à rien, uniquement pour l'instanciation de gameengine
gameengine = GameEngine(snake=snake,
                        rabbits=rabbits,
                        grid=grid,
                        algorithm=algorithm,
                        one_rabbit_mode=True)


In [6]:
min_episodes_criterion = 100
max_episodes = 10000
max_steps_per_episode = 1000

# Cartpole-v0 is considered solved if average reward is >= 195 over 100
# consecutive trials
reward_threshold = 195
running_reward = 0

# Discount factor for future rewards
gamma = 0.99
optimizer = tf.keras.optimizers.Adam(0.01)

# Keep last episodes reward
episodes_reward: collections.deque = collections.deque(
    maxlen=min_episodes_criterion)

with tqdm.trange(max_episodes) as t:
    for i in t:
        initial_state = tf.constant(gameengine.get_state_tensor(),
                                    dtype=tf.int32)
        episode_reward = int(
            train_step(env=gameengine,
                       initial_state=initial_state,
                       model=model,
                       optimizer=optimizer,
                       gamma=gamma))

        episodes_reward.append(episode_reward)
        running_reward = statistics.mean(episodes_reward)

        t.set_description(f'Episode {i}')
        t.set_postfix(episode_reward=episode_reward,
                      running_reward=running_reward)

        # Show average episode reward every 10 episodes
        if i % 10 == 0:
            pass  # print(f'Episode {i}: average reward: {avg_reward}')

        if running_reward > reward_threshold and i >= min_episodes_criterion:
            break

print(f'\nSolved at episode {i}: average reward: {running_reward:.2f}!')


I0000 00:00:1726097774.770502  141287 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-12 01:36:14.795153: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  0%|          | 0/10000 [00:00<?, ?it/s]

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: "set_loop_options" must be the first statement in the loop block
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: "set_loop_options" must be the first statement in the loop block


  0%|          | 0/10000 [00:00<?, ?it/s]


OperatorNotAllowedInGraphError: in user code:

    File "/tmp/ipykernel_141287/2632671524.py", line 7, in train_step  *
        action_probs, values, rewards = run_episode(
    File "/tmp/ipykernel_141287/2451937718.py", line 17, in run_episode
        while not done:

    OperatorNotAllowedInGraphError: Using a symbolic `tf.Tensor` as a Python `bool` is not allowed. You can attempt the following resolutions to the problem: If you are running in Graph mode, use Eager execution mode or decorate this function with @tf.function. If you are using AutoGraph, you can try decorating this function with @tf.function. If that does not work, then you may be using an unsupported feature or your source code may not be visible to AutoGraph. See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/limitations.md#access-to-source-code for more information.


In [8]:
keras.saving.save_model(algorithm.model, "model.keras")

AttributeError: 'Astar' object has no attribute 'model'

In [None]:
# Train from existing model
model = keras.saving.load_model("model.keras")