# Go project
Auteur du notebook : Roc de Larouzière

**INSTALLATIONS ET IMPORTS** des dépendances nécessaires du projet

In [None]:
!rm -rf /content/sample_data/
!rm -f /content/*

!wget https://www.lamsade.dauphine.fr/~cazenave/project2025.zip
!unzip project2025.zip
!ls -l

!pip install tensorrt-bindings==8.6.1
#!pip install --extra-index-url https://pypi.nvidia.com tensorrt-libs
#!pip install tensorflow[and-cuda]==2.15.0
!pip install tensorflow==2.15.0

import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers as L, regularizers, Model
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.optimizers.schedules import CosineDecay
import gc
import matplotlib.pyplot as plt
import logging
import csv
import golois

**TRAITEMENT DES DONNEES**

In [None]:
planes = 31      #nombre de plans/features pour représenter l'état du jeu
moves = 361      #tous les coups possibles sur un plateau de Go : 19*19 = 361 intersections
N = 10000        # Nombre d'exemples d'entraînement
filters = 32     #nombre de filtres (ou noyaux) des couches convolutives

input_data = np.random.randint(2, size=(N, 19, 19, planes))    #crée un tableau NumPy à 4 dimensions avec des valeurs aléatoires de 0 ou 1.
input_data = input_data.astype ('float32') #convertit les int en float

policy = np.random.randint(moves, size=(N,))  #tableau 1D de taille 10000 où chaque élément i correspond au coup à jouer pour le ième exemple
policy = keras.utils.to_categorical (policy)  #Convertit chaque nombre en un vecteur "one-hot encoding", Le résultat final sera un tableau de taille (N, moves) soit (10000, 361)
#policy représente les coups à jouer pour chaque exemple, on a donc 10000 vecteurs de taille 361 avec 1.0 pour le coup joué et 0 pour tout le reste
#Utilisé pour entrainer le réseau à apprendre le prochain coup

value = np.random.randint(2, size=(N,))
value = value.astype ('float32')
#Évaluation binaire pour chaque position (qui a l'avantage/qui va gagner pour chaque exemple)


end = np.random.randint(2, size=(N, 19, 19, 2))
end = end.astype ('float32')
#État final du plateau pour chaque partie
#2 plans probablement pour territoires finales noir (1 plan) et blanc (1 plan)


groups = np.zeros((N, 19, 19, 1))
groups = groups.astype ('float32')
#Information sur les groupes de pierres
#Initialisé à zéro, rempli pendant l'entraînement


print ("getValidation", flush = True)
golois.getValidation (input_data, policy, value, end)
#Appelle la fonction getValidation du module golois pour préparer les données de validation
#Charge ou genere un fichier validation.data, contenant les indices des positions à utiliser pour la validation

#Cette partie du code initialise toutes les structures de données nécessaires pour l'entraînement
#du réseau de neurones pour le jeu de Go et prépare les données de validation.

# PARTIE SUR LES RESIDUAL NETWORKS

## Modèle ResNetSanSe

In [None]:
def create_res_block(x, filters, block_index):
    kernel_initializer = tf.keras.initializers.GlorotNormal(seed=42)


    residual = x

    # First convolutional layer in the residual block
    x = L.Conv2D(filters, 3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name=f'res_conv1_block{block_index}')(x)
    x = L.BatchNormalization(name=f'res_bn1_block{block_index}')(x)
    x = L.ReLU(name=f'res_relu1_block{block_index}')(x)

    # Second convolutional layer in the residual block
    x = L.Conv2D(filters, 3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name=f'res_conv2_block{block_index}')(x)
    x = L.BatchNormalization(name=f'res_bn2_block{block_index}')(x)

    # Adding the residual connection
    x = L.Add(name=f'res_add_block{block_index}')([x, residual])

    x = L.ReLU(name=f'res_relu2_block{block_index}')(x)

    return x

def create_network_SanSe(input_shape, num_resblocks, num_hidden, action_size):
    inputs = L.Input(shape=input_shape, name='input_layer')

    #Initial convolutional layer
    x = L.Conv2D(num_hidden, 3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name='start_conv')(inputs)
    x = L.BatchNormalization(name='start_bn')(x)
    x = L.ReLU(name='start_relu')(x)

    # Residual blocks
    for i in range(num_resblocks):
        x = create_res_block(x, num_hidden, block_index=i)

    # Policy head
    policy = L.Conv2D(32, 3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name='policy_conv1')(x)  # 32 filters
    policy = L.BatchNormalization(name='policy_bn1')(policy)
    policy = L.ReLU(name='policy_relu1')(policy)
    policy = L.Conv2D(1, 1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name='policy_conv2')(policy)  # Reduce to 1 filter
    policy = L.BatchNormalization(name='policy_bn2')(policy)
    policy = L.ReLU(name='policy_relu2')(policy)
    policy = L.Flatten(name='policy_flatten')(policy)
    policy = L.Activation('softmax', name='policy')(policy)  # Output: action probabilities

    # Value head
    value = L.Conv2D(3, 3, padding='same', name='value_conv1')(x)  # 3 filters
    value = L.BatchNormalization(name='value_bn1')(value)
    value = L.ReLU(name='value_relu1')(value)
    value = L.GlobalAveragePooling2D()(value)
    value = L.Dense(50, activation='relu',
                              kernel_regularizer=regularizers.l2(1e-4), name='value_dense1')(value)
    value = L.Dense(1, activation='sigmoid',
                         kernel_regularizer=keras.regularizers.l2(1e-4), name='value')(value)  # Output: scalar value


    model = tf.keras.Model(inputs=inputs, outputs=[policy, value], name='Go_Network')

    return model

Compilation du modèle

In [None]:
input_shape = (19,19,planes)
num_resblocks = 4
num_hidden = filters   #num_hidden = nombre de filtres (canaux de sortie)
action_size = moves


model_ResNetSanSe = create_network_SanSe(input_shape, num_resblocks, num_hidden, action_size)
model_ResNetSanSe.summary()

epochs = 300
batch = 64

#Cosine Annealing
initial_learning_rate = 0.001  # Peut être augmenté (0.01) si nécessaire
total_steps = epochs * (N // batch)  # 10 * (10_000//64) ≈ 1560
alpha = 0.1  # LR final = 0.1% du LR initial

# --- Définition du scheduler ---
lr_schedule = CosineDecay(
    initial_learning_rate,
    total_steps,
    alpha=alpha
)


model_ResNetSanSe.compile(optimizer= tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={
                'policy': ['categorical_accuracy'],
                'value': ['mse'] })

## ResNet2

In [None]:
def create_res_block(x, filters, block_index):  #Meme bloc residuel que sur le papier de recherche ResNet (cf rapport)
    residual = x
    x = L.Conv2D(filters, 3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name=f'res_conv1_block{block_index}')(x)
    x = L.Add(name=f'res_add_block{block_index}')([x, residual])
    x = L.ReLU(name=f'res_relu2_block{block_index}')(x)
    x = L.BatchNormalization(name=f'res_bn1_block{block_index}')(x)
    return x



def create_network_ResNet2(input_shape, num_resblocks, num_hidden, action_size):
    inputs = L.Input(shape=input_shape, name='input_layer')

    # Initial convolutional layer
    x1 = L.Conv2D(num_hidden, 5, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name='start_conv_5')(inputs)
    x2 = L.Conv2D(num_hidden, 1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name='start_conv_1')(inputs)
    x = L.Add(name=f'input_add_layer')([x1, x2])
    x = L.ReLU(name='start_relu')(x)

    # Residual blocks
    for i in range(num_resblocks):
        x = create_res_block(x, num_hidden, block_index=i)


    # Policy head
    policy = L.Conv2D(32, 3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name='policy_conv1')(x)  # 32 filters
    policy = L.BatchNormalization(name='policy_bn1')(policy)
    policy = L.ReLU(name='policy_relu1')(policy)
    policy = L.Conv2D(1, 1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), name='policy_conv2')(policy)  # Reduce to 1 filter
    policy = L.BatchNormalization(name='policy_bn2')(policy)
    policy = L.ReLU(name='policy_relu2')(policy)
    policy = L.Flatten(name='policy_flatten')(policy)
    policy = L.Activation('softmax', name='policy')(policy)  # Output: action probabilities

    # Value head
    value = L.Conv2D(3, 3, padding='same', name='value_conv1')(x)  # 3 filters
    value = L.BatchNormalization(name='value_bn1')(value)
    value = L.ReLU(name='value_relu1')(value)
    value = L.Flatten(name='value_flatten')(value)
    value = L.Dense(1, activation='sigmoid', kernel_regularizer=keras.regularizers.l2(1e-4), name='value')(value)  # Output: scalar value

    model = tf.keras.Model(inputs=inputs, outputs=[policy, value], name='ResNet_Network')

    return model

Computation

In [None]:
model_ResNet2 = create_network_ResNet2(input_shape, num_resblocks, num_hidden, action_size)
model_ResNet2.summary()

epochs = 300
batch = 64

#Cosine Annealing
initial_learning_rate = 0.001  # Peut être augmenté (0.01) si nécessaire
total_steps = epochs * (N // batch)  # 10 * (10_000//64) ≈ 1560
alpha = 0.1  # LR final = 0.1% du LR initial

# --- Définition du scheduler ---
lr_schedule = CosineDecay(
    initial_learning_rate,
    total_steps,
    alpha=alpha
)


model_ResNet2.compile(optimizer= tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={
        'policy': ['categorical_accuracy'],
        'value': ['mse'] })

## Training
Ici changer model par le nom du modèle que l'on souhaite entrainer

In [None]:
# Fichier CSV pour stocker les pertes et métriques de validation
csv_filename = "save_metrics.csv"

# Initialisation des listes de stockage des métriques
policy_losses = []
value_losses = []
val_policy_accuracy = []
val_value_mse = []

# Boucle d'entraînement
for i in range(1, epochs + 1):
    print(f"Epoch {i}")

    golois.getBatch(input_data, policy, value, end, groups, i * N)

    history = model.fit(input_data,
                         {'policy': policy, 'value': value},
                         epochs=1, batch_size=batch)

    #si on tourne en local, décommenter cette partie et commenter la partie au dessus
    #dataset = tf.data.Dataset.from_tensor_slices((input_data, {'policy': policy, 'value': value}))
    #dataset = dataset.shuffle(N).batch(batch).prefetch(tf.data.AUTOTUNE)
    #history = model.fit(dataset, epochs=1, callbacks=[early_stopping])


    policy_losses.append(history.history['policy_loss'][0])
    value_losses.append(history.history['value_loss'][0])

    # Libération de mémoire tous les 5 epochs
    if i % 5 == 0:
        gc.collect()


    if i % 20 == 0:
        golois.getValidation(input_data, policy, value, end)
        val = model.evaluate(input_data, [policy, value], verbose=0, batch_size=batch)

        # Stockage des résultats de validation
        val_policy_accuracy.append(val[3])  # categorical_accuracy
        val_value_mse.append(val[4])  # mse

        # Enregistrement dans le log
        log_message = f"Validation at epoch {i}: {val}"
        print(log_message)

    #ici juste pour dire que si on est pas en validation, on met des 0 au lieu de rien mettre
    if not (i%20==0) :
        val_policy_accuracy.append(0)  # categorical_accuracy
        val_value_mse.append(0)  # mse


# Sauvegarde des métriques dans un fichier CSV à la fin du training
with open(csv_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Epoch", "Policy Loss", "Value Loss", "Validation Policy Accuracy", "Validation Value MSE"])
    for epoch in range(len(policy_losses)):
        writer.writerow([
            epoch + 1,
            policy_losses[epoch],
            value_losses[epoch],
            val_policy_accuracy[epoch],
            val_value_mse[epoch]
        ])

# Sauvegarde du modèle entraîné
model_filename = "model.h5"
model.save(model_filename)
print(f"Modèle sauvegardé sous {model_filename}")

# PARTIE SUR EFFICIENT FORMER

## Modèle EfficientFormerRough

In [None]:
def conv_stem(x, embed_dim): #bloc stem de convolution
    x = L.Conv2D(embed_dim, 3, padding='same', use_bias=False)(x)
    x = L.BatchNormalization()(x)
    x = L.ReLU()(x)
    return x

def mb4d(x, dim, expansion=4):  #bloc qui s'inspire de mb4d
    hidden_dim = dim * expansion
    residual = x
    x = L.Conv2D(hidden_dim, 1, padding='same', use_bias=False)(x)
    x = L.BatchNormalization()(x)
    x = L.ReLU()(x)
    x = L.DepthwiseConv2D(3, padding='same', use_bias=False)(x)
    x = L.BatchNormalization()(x)
    x = L.ReLU()(x)
    x = L.Conv2D(dim, 1, padding='same', use_bias=False)(x)
    x = L.BatchNormalization()(x)
    x = L.add([x, residual])
    return x

def mb3d(x, dim):  #bloc qui s'inspire de mb3d, integrant la MHSA
    # Extraction de la forme dynamique
    b = tf.shape(x)[0]
    h = tf.shape(x)[1]
    w = tf.shape(x)[2]
    c = tf.shape(x)[3]

    x_flat = tf.reshape(x, [b, h * w, c]) #passage en 3 dim
    attn_out = L.MultiHeadAttention(num_heads=2, key_dim=dim // 2)(x_flat, x_flat)
    x = L.LayerNormalization(epsilon=1e-6)(attn_out + x_flat)
    x = L.Dense(dim)(x)
    x = tf.reshape(x, [b, h, w, dim]) # Reshape en format image en 4 dim pour refaire la mise a plat etc...
    return x

def build_fixed_model_Rough(input_shape=(19, 19, 31)):
    inputs = L.Input(shape=input_shape)

    x = conv_stem(inputs, 32)

    # 2 blocs MB4D
    x = mb4d(x, 32, expansion=4)
    x = mb4d(x, 32, expansion=4)

    # Transition
    x = L.Conv2D(56, 3, padding='same')(x)
    x = L.BatchNormalization()(x)
    x = L.ReLU()(x)

    # 2 blocs MB3D
    x = mb3d(x, 56)
    x = mb3d(x, 56)

    # Policy head
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False)(x)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.ReLU()(policy_head)
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False)(policy_head)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.ReLU()(policy_head)
    policy_head = L.Conv2D(1, 1, activation='relu', padding='same',
                             use_bias=False,
                             kernel_regularizer=regularizers.l2(0.0001))(policy_head)
    policy_head = L.Flatten()(policy_head)
    policy_head = L.Activation('softmax', name='policy')(policy_head)

    # Value head
    value_head = L.Conv2D(filters=32, kernel_size=3, strides=1, padding='same', use_bias=False)(x)
    value_head = L.BatchNormalization()(value_head)
    value_head = L.Activation("swish")(value_head)
    value_head = L.GlobalAveragePooling2D()(value_head)
    value_head = L.Dense(50, activation='relu',
                         kernel_regularizer=regularizers.l2(0.0001))(value_head)
    value_head = L.Dense(1, activation='sigmoid', name='value',
                         kernel_regularizer=regularizers.l2(0.0001))(value_head)

    model = keras.Model(inputs=inputs, outputs=[policy_head, value_head])
    return model

Computation

In [None]:
model_EFRough = build_fixed_model_Rough()
model_EFRough.summary()

epochs = 1000
batch = 64

#Cosine Annealing
initial_learning_rate = 0.001  # Peut être augmenté (0.01) si nécessaire
total_steps = epochs * (N // batch)  # 10 * (10_000//64) ≈ 1560
alpha = 0.1  # LR final = 0.1% du LR initial

# --- Définition du scheduler ---
lr_schedule = CosineDecay(
    initial_learning_rate,
    total_steps,
    alpha=alpha
)


model_EFRough.compile(optimizer= tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={
        'policy': ['categorical_accuracy'],
        'value': ['mse'] })

## Modèle EfficientFormerWay

In [None]:
def conv_stem(x, out_channels):   #on utilise un simple Conv 3x3 et stride=1 + BN + ReLU pour garder la même résolution (19x19)
    x = L.Conv2D(out_channels, kernel_size=3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False, name='stem_conv_3x3')(x)
    x = L.BatchNormalization(name='stem_bn')(x)
    x = L.ReLU(name='stem_relu')(x)
    return x


def mb4d(x, dim, expansion, block_id):
    # Connexion résiduelle
    shortcut = x

    # Token mixer local : un pooling 2D simple pour mélanger localement
    # puis addition résiduelle
    pool = L.AveragePooling2D(pool_size=3, strides=1, padding='same', name=f'mb4d_block{block_id}_avgpool')(x)
    x = L.Add(name=f'mb4d_block{block_id}_local_mix')([pool, x])  # local mixing

    # MLP convolutionnel
    x = L.Conv2D(dim * expansion, kernel_size=1, padding='same', use_bias=False, name=f'mb4d_block{block_id}_expand_conv')(x) #Ici on augmente la dimension avec dim * expansion
    x = L.BatchNormalization(name=f'mb4d_block{block_id}_expand_bn')(x)
    x = L.ReLU(name=f'mb4d_block{block_id}_expand_relu')(x)

    x = L.Conv2D(dim, kernel_size=1, padding='same', use_bias=False, name=f'mb4d_block{block_id}_reduce_conv')(x)   #Ici on réduit à la dimension d'origine
    x = L.BatchNormalization(name=f'mb4d_block{block_id}_reduce_bn')(x)

    # Ajout de la connexion résiduelle
    x = L.Add(name=f'mb4d_block{block_id}_skip')([shortcut, x])
    x = L.ReLU(name=f'mb4d_block{block_id}_out')(x)
    return x


def mb3d(x, dim, num_heads, mlp_expansion, block_id):

    shortcut = x

    # On récupère (B, H, W, C)
    b = tf.shape(x)[0]
    h = tf.shape(x)[1]
    w = tf.shape(x)[2]
    c = tf.shape(x)[3]

    # Passage en (B, H*W, C)
    x_reshaped = tf.reshape(x, [b, h*w, c])

    # LayerNorm + MHSA
    x_ln = L.LayerNormalization(name=f'mb3d_block{block_id}_ln_attn')(x_reshaped)
    attn_out = L.MultiHeadAttention(num_heads=num_heads, key_dim=dim // num_heads,
                                    dropout=0.0, name=f'mb3d_block{block_id}_mhsa')(x_ln, x_ln)

    # Skip connection
    x = L.Add(name=f'mb3d_block{block_id}_skip_attn')([attn_out, x_reshaped])

    # MLP : Dense + GeLU + Dense
    x_ln2 = L.LayerNormalization(name=f'mb3d_block{block_id}_ln_ffn')(x)
    x_mlp = L.Dense(int(dim * mlp_expansion), use_bias=False, kernel_regularizer=regularizers.l2(1e-4), name=f'mb3d_block{block_id}_ffn_expand')(x_ln2)
    x_mlp = L.Activation('gelu', name=f'mb3d_block{block_id}_ffn_gelu')(x_mlp)
    x_mlp = L.Dense(dim, use_bias=False, kernel_regularizer=regularizers.l2(1e-4),name=f'mb3d_block{block_id}_ffn_reduce')(x_mlp)

    x = L.Add(name=f'mb3d_block{block_id}_skip_ffn')([x, x_mlp])

    # On redonne la forme (B, H, W, C) pour revenir au debut du block ensuite
    x = tf.reshape(x, [b, h, w, dim], name=f'mb3d_block{block_id}_out')

    return x



def build_efficientformer_go_Way(input_shape=(19, 19, 31),stem_dim=32, blocks_4d=3, blocks_3d=2, trans_dim=48, num_heads=3, conv_expansion=4, trans_expansion=1.5):

    inputs = L.Input(shape=input_shape)

    # Embedding initial
    x = conv_stem(inputs, stem_dim)


    for i in range(blocks_4d):
        x = mb4d(x, dim=stem_dim, expansion=conv_expansion, block_id=i)

    # Transition vers dimension plus large pour la phase Transformer
    x = L.Conv2D(trans_dim, kernel_size=3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = L.BatchNormalization()(x)
    x = L.ReLU()(x)

    for i in range(blocks_3d):
        x = mb3d(x, dim=trans_dim, num_heads=num_heads, mlp_expansion=trans_expansion, block_id=i)



    # Policy head
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(x)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.ReLU()(policy_head)
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(policy_head)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.ReLU()(policy_head)
    policy_head = L.Conv2D(filters=1, kernel_size=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(policy_head)
    policy_head = L.Flatten()(policy_head)
    policy_out = L.Activation('softmax', name='policy')(policy_head)


    # Value head
    value_head = L.Conv2D(filters=32, kernel_size=3, strides=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(0.0001))(x)
    value_head = L.BatchNormalization()(value_head)
    value_head = L.Activation("swish")(value_head)
    value_head = L.GlobalAveragePooling2D()(value_head)
    value_head = L.Dense(42, activation='relu',
                              kernel_regularizer=regularizers.l2(0.0001))(value_head)
    value_out = L.Dense(1, activation='sigmoid', name='value',
                              kernel_regularizer=regularizers.l2(0.0001))(value_head)


    model = keras.Model(inputs=inputs, outputs=[policy_out, value_out], name='efficientformer_go')
    return model

Computation

In [None]:
model_EFWay = build_efficientformer_go_Way()
model_EFWay.summary()



epochs = 1000
batch = 64

#Cosine Annealing
initial_learning_rate = 0.001  # Peut être augmenté (0.01) si nécessaire
total_steps = epochs * (N // batch)  # 10 * (10_000//64) ≈ 1560
alpha = 0.1  # LR final = 0.1% du LR initial (0.001 * 0.1 = 0.0001)

# --- Définition du scheduler ---
lr_schedule = CosineDecay(
    initial_learning_rate,
    total_steps,
    alpha=alpha
)


model_EFWay.compile(optimizer= tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={
        'policy': ['categorical_accuracy'],
        'value': ['mse'] })


## Modèle GOat

In [None]:
def conv_stem(x, out_channels): #comme au dessus
    x = L.Conv2D(out_channels, kernel_size=3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False, name='stem_conv_3x3')(x)
    x = L.BatchNormalization(name='stem_bn')(x)
    x = L.Activation('swish',name='stem_relu')(x)
    return x


def mixconv2d(x, kernels=[3,5], name="mixconv"):  #Applique deux depthwise convs (k=3 et k=5) en parallèle et concatène leur sortie

    input_channels = x.shape[-1]
    num_groups = len(kernels)
    channels_per_group = input_channels // num_groups

    group_outputs = []
    for i, k in enumerate(kernels):
        # Sélectionne un des groupes de canaux
        x_group = L.Lambda(lambda z: z[:,:,:, i*channels_per_group:(i+1)*channels_per_group])(x)

        # Convolution depthwise
        x_group = L.DepthwiseConv2D(
            kernel_size=k,
            strides=1,
            padding='same',
            use_bias=False,
            kernel_regularizer=regularizers.l2(1e-4),
            name=f"{name}_dw{k}_{i}"
        )(x_group)

        # On recolle le résultat de ce groupe
        group_outputs.append(x_group)

    # On concatène le tout
    out = L.Concatenate(axis=-1, name=f"{name}_concat")(group_outputs)
    return out


def mb4d(x, dim, expansion, block_id):


    shortcut = x

    # Token mixing
    pool = mixconv2d(x, kernels=[3,5], name=f'mb4d_block{block_id}_mixconv')
    x = L.Add(name=f'mb4d_block{block_id}_local_mix')([pool, x])  # local mixing

    # MLP convolutionnel
    x = L.Conv2D(dim * expansion, kernel_size=1, padding='same', use_bias=False, name=f'mb4d_block{block_id}_expand_conv')(x) #Ici on augmente la dimension avec dim * expansion
    x = L.BatchNormalization(name=f'mb4d_block{block_id}_expand_bn')(x)
    x = L.Activation('swish', name=f'mb4d_block{block_id}_expand_swish')(x)

    x = L.Conv2D(dim, kernel_size=1, padding='same', use_bias=False, name=f'mb4d_block{block_id}_reduce_conv')(x)   #Ici on réduit à la dimension d'origine
    x = L.BatchNormalization(name=f'mb4d_block{block_id}_reduce_bn')(x)

    # Ajout de la connexion résiduelle
    x = L.Add(name=f'mb4d_block{block_id}_skip')([shortcut, x])
    x = L.Activation('swish', name=f'mb4d_block{block_id}_out')(x)
    return x


def mb3d(x, dim, num_heads, mlp_expansion, block_id):

    shortcut = x

    # On récupère (B, H, W, C)
    b = tf.shape(x)[0]
    h = tf.shape(x)[1]
    w = tf.shape(x)[2]
    c = tf.shape(x)[3]

    # Passage en (B, H*W, C)
    x_reshaped = tf.reshape(x, [b, h*w, c])

    # LayerNorm + MHSA
    x_ln = L.LayerNormalization(name=f'mb3d_block{block_id}_ln_attn')(x_reshaped)
    attn_out = L.MultiHeadAttention(num_heads=num_heads, key_dim=dim // num_heads,
                                    dropout=0.0, name=f'mb3d_block{block_id}_mhsa')(x_ln, x_ln)

    # Skip-connection
    x = L.Add(name=f'mb3d_block{block_id}_skip_attn')([attn_out, x_reshaped])

    # MLP : Dense + GeLU + Dense
    x_ln2 = L.LayerNormalization(name=f'mb3d_block{block_id}_ln_ffn')(x)
    x_mlp = L.Dense(int(dim * mlp_expansion), use_bias=False, kernel_regularizer=regularizers.l2(1e-4), name=f'mb3d_block{block_id}_ffn_expand')(x_ln2)
    x_mlp = L.Activation('gelu', name=f'mb3d_block{block_id}_ffn_gelu')(x_mlp)
    x_mlp = L.Dense(dim, use_bias=False, kernel_regularizer=regularizers.l2(1e-4),name=f'mb3d_block{block_id}_ffn_reduce')(x_mlp)

    x = L.Add(name=f'mb3d_block{block_id}_skip_ffn')([x, x_mlp])

    # On redonne la forme (B, H, W, C)
    x = tf.reshape(x, [b, h, w, dim], name=f'mb3d_block{block_id}_out')

    return x



def build_efficientformer_GOat(input_shape=(19, 19, 31),stem_dim=32, blocks_4d=3, blocks_3d=2, trans_dim=48, num_heads=3, conv_expansion=4, trans_expansion=1.5):

    inputs = L.Input(shape=input_shape)

    # Embedding initial : conv stem
    x = conv_stem(inputs, stem_dim)


    x = mb4d(x, dim=stem_dim, expansion=conv_expansion, block_id=0)
    x = mb4d(x, dim=stem_dim, expansion=conv_expansion, block_id=1)
    x = mb4d(x, dim=stem_dim, expansion=conv_expansion-1, block_id=2)


    x = L.Conv2D(trans_dim, kernel_size=3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(x)
    x = L.BatchNormalization()(x)
    x = L.Activation('swish')(x)


    for i in range(blocks_3d):
        x = mb3d(x, dim=trans_dim, num_heads=num_heads, mlp_expansion=trans_expansion, block_id=i)



    # Policy head
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(x)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.Activation('swish')(policy_head)
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(policy_head)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.Activation('swish')(policy_head)
    policy_head = L.Conv2D(filters=1, kernel_size=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(policy_head)
    policy_head = L.Flatten()(policy_head)
    policy_out = L.Activation('softmax', name='policy')(policy_head)


    # Value head
    value_head = L.Conv2D(filters=32, kernel_size=3, strides=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(0.0001))(x)
    value_head = L.BatchNormalization()(value_head)
    value_head = L.Activation("swish")(value_head)
    value_head = L.GlobalAveragePooling2D()(value_head)
    value_head = L.Dense(42, activation='swish',
                              kernel_regularizer=regularizers.l2(0.0001))(value_head)

    value_out = L.Dense(1, activation='sigmoid', name='value',
                              kernel_regularizer=regularizers.l2(0.0001))(value_head)


    model = keras.Model(inputs=inputs, outputs=[policy_out, value_out], name='efficientformer_go')
    return model

Computation

In [None]:
# On construit le modèle et on regarde le résumé
model_GOat = build_efficientformer_GOat()
model_GOat.summary()



epochs = 900
batch = 64

#Cosine Annealing
initial_learning_rate = 0.001  # Peut être augmenté (0.01) si nécessaire
total_steps = epochs * (N // batch)  # 10 * (10_000//64) ≈ 1560
alpha = 0.1  # LR final = 0.1% du LR initial (0.001 * 0.1 = 0.0001)

# --- Définition du scheduler ---
lr_schedule = CosineDecay(
    initial_learning_rate,
    total_steps,
    alpha=alpha
)


model_GOat.compile(optimizer= tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={
        'policy': ['categorical_accuracy'],
        'value': ['mse'] })

## Training
Ici changer model par le nom du modèle que l'on souhaite entrainer

In [None]:
# Fichier CSV pour stocker les pertes et métriques de validation
csv_filename = "save_metrics.csv"

# Initialisation des listes de stockage des métriques
policy_losses = []
value_losses = []
val_policy_accuracy = []
val_value_mse = []

# Boucle d'entraînement
for i in range(1, epochs + 1):
    print(f"Epoch {i}")

    golois.getBatch(input_data, policy, value, end, groups, i * N)

    history = model.fit(input_data,
                         {'policy': policy, 'value': value},
                         epochs=1, batch_size=batch)

    #si on tourne en local, décommenter cette partie et commenter la partie au dessus
    #dataset = tf.data.Dataset.from_tensor_slices((input_data, {'policy': policy, 'value': value}))
    #dataset = dataset.shuffle(N).batch(batch).prefetch(tf.data.AUTOTUNE)
    #history = model.fit(dataset, epochs=1, callbacks=[early_stopping])


    policy_losses.append(history.history['policy_loss'][0])
    value_losses.append(history.history['value_loss'][0])

    # Libération de mémoire tous les 5 epochs
    if i % 5 == 0:
        gc.collect()


    if i % 20 == 0:
        golois.getValidation(input_data, policy, value, end)
        val = model.evaluate(input_data, [policy, value], verbose=0, batch_size=batch)

        # Stockage des résultats de validation
        val_policy_accuracy.append(val[3])  # categorical_accuracy
        val_value_mse.append(val[4])  # mse

        # Enregistrement dans le log
        log_message = f"Validation at epoch {i}: {val}"
        print(log_message)

    #ici juste pour dire que si on est pas en validation, on met des 0 au lieu de rien mettre
    if not (i%20==0) :
        val_policy_accuracy.append(0)  # categorical_accuracy
        val_value_mse.append(0)  # mse


# Sauvegarde des métriques dans un fichier CSV à la fin du training
with open(csv_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Epoch", "Policy Loss", "Value Loss", "Validation Policy Accuracy", "Validation Value MSE"])
    for epoch in range(len(policy_losses)):
        writer.writerow([
            epoch + 1,
            policy_losses[epoch],
            value_losses[epoch],
            val_policy_accuracy[epoch],
            val_value_mse[epoch]
        ])

# Sauvegarde du modèle entraîné
model_filename = "model.h5"
model.save(model_filename)
print(f"Modèle sauvegardé sous {model_filename}")

# PARTIE SUR MOBILENETSV3

## Modele MobileNet3_A

In [None]:
# Activation HardSwish
def h_swish(x):
    return x * tf.nn.relu6(x + 3) / 6

# Activation HardSigmoid
def h_sigmoid(x):
    return tf.nn.relu6(x + 3) / 6


# Squeeze-and-Excite function
def squeeze_excite(x, se_ratio=4):
    input_channels = x.shape[-1]
    se = L.GlobalAveragePooling2D()(x)  # Pooling global pour obtenir une représentation (batch, channels)

    se = L.Reshape((1, 1, input_channels))(se)  # Reshape pour avoir une carte de caractéristiques 1x1
    reduced_channels = input_channels // se_ratio

    # Première projection (équivalent à une convolution 1x1)
    se = L.Conv2D(reduced_channels, kernel_size=1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(se)
    se = L.BatchNormalization()(se)
    se = L.ReLU()(se)

    # Expansion pour revenir au nombre de canaux initial
    se = L.Conv2D(input_channels, kernel_size=1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(se)
    se = L.BatchNormalization()(se)
    se = h_sigmoid(se)

    # Multiplication élément par élément avec l'entrée
    return L.Multiply()([x, se])



def mixconv2d(x, kernels=[3,5], name="mixconv"):
    input_channels = x.shape[-1]
    num_groups = len(kernels)
    channels_per_group = input_channels // num_groups

    group_outputs = []
    for i, k in enumerate(kernels):
        x_group = L.Lambda(lambda z: z[:,:,:, i*channels_per_group:(i+1)*channels_per_group])(x)

        x_group = L.DepthwiseConv2D(
            kernel_size=k,
            strides=1,
            padding='same',
            use_bias=False,
            kernel_regularizer=regularizers.l2(1e-4)
        )(x_group)


        group_outputs.append(x_group)


    out = L.Concatenate(axis=-1)(group_outputs)
    return out


# Bloc Bottleneck (MobileNetV3)
def bottleneck(x, kernel, stride, expansion, output_channels, activation, se=False):
    input_tensor = x  # sauvegarde pour la connexion résiduelle

    # Phase d'expansion
    x = L.Conv2D(expansion, kernel_size=1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(x)
    x = L.BatchNormalization()(x)
    x = activation(x)

    # Convolution depthwise
    x = mixconv2d(x, kernels=[3, 5])
    x = L.BatchNormalization()(x)
    x = activation(x)

    # Squeeze-and-Excite si activé
    if se:
        x = squeeze_excite(x, se_ratio=4)

    # Convolution pointwise
    x = L.Conv2D(output_channels, kernel_size=1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(x)
    x = L.BatchNormalization()(x)
    x = activation(x)

    # Connexion résiduelle : si le stride est 1 et les dimensions correspondent
    if stride == 1:
        if input_tensor.shape[-1] == output_channels:
            x = L.Add()([x, input_tensor])
        else:
            shortcut = L.Conv2D(output_channels, kernel_size=1, strides=stride, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(input_tensor)
            shortcut = L.BatchNormalization()(shortcut)
            x = L.Add()([x, shortcut])
    else:
        # Si stride > 1, on ajuste la dimension du raccourci
        shortcut = L.Conv2D(output_channels, kernel_size=1, strides=stride, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(input_tensor)
        shortcut = L.BatchNormalization()(shortcut)
        x = L.Add()([x, shortcut])

    return x


# Construction du modèle MobileNetV3
def MobileNetV3_A(input_shape):
    inputs = L.Input(shape=input_shape)

    # Convolution initiale
    x = L.Conv2D(32, kernel_size=3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(inputs)
    x = L.BatchNormalization()(x)
    x = h_swish(x)



    x = bottleneck(x, kernel=3, stride=1, expansion=32, output_channels=32,
                   activation=tf.keras.activations.swish, se=False)


    x = bottleneck(x, kernel=3, stride=1, expansion=32, output_channels=32,
                   activation=tf.keras.activations.swish, se=False)


    x = bottleneck(x, kernel=3, stride=1, expansion=64, output_channels=64,
                   activation=h_swish, se=True)


    x = bottleneck(x, kernel=3, stride=1, expansion=128, output_channels=128,
                   activation=h_swish, se=True)



    # Policy head
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(x)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.Activation('swish')(policy_head)
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(policy_head)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.Activation('swish')(policy_head)
    policy_head = L.Conv2D(filters=1, kernel_size=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(policy_head)
    policy_head = L.Flatten()(policy_head)
    policy_out = L.Activation('softmax', name='policy')(policy_head)

    # Value head
    value_head = L.Conv2D(filters=16, kernel_size=3, strides=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(0.0001))(x)
    value_head = L.BatchNormalization()(value_head)
    value_head = L.Activation("swish")(value_head)
    value_head = L.GlobalAveragePooling2D()(value_head)
    value_head = L.Dense(50, activation='swish',
                              kernel_regularizer=regularizers.l2(0.0001))(value_head)

    value_out = L.Dense(1, activation='sigmoid', name='value',
                              kernel_regularizer=regularizers.l2(0.0001))(value_head)


    model = keras.Model(inputs=inputs, outputs=[policy_out, value_out], name='MobileNet_go')

    return model

Computation

In [None]:
model_3A = MobileNetV3_A(input_shape=(19, 19, 31))
model_3A.summary()



epochs = 800
batch = 64

#Cosine Annealing
initial_learning_rate = 0.001  # Peut être augmenté (0.01) si nécessaire
total_steps = epochs * (N // batch)  # 10 * (10_000//64) ≈ 1560
alpha = 0.1  # LR final = 0.1% du LR initial (0.001 * 0.1 = 0.0001)

# --- Définition du scheduler ---
lr_schedule = CosineDecay(
    initial_learning_rate,
    total_steps,
    alpha=alpha
)


model_3A.compile(optimizer= tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={
        'policy': ['categorical_accuracy'],
        'value': ['mse'] })


## Modele MobileNet3_B

In [None]:
# Hard Swish
def h_swish(x):
    return x * tf.nn.relu6(x + 3) / 6

# Hard Sigmoid
def h_sigmoid(x):
    return tf.nn.relu6(x + 3) / 6


# Définition de l'attention canal
def channel_attention(input_feature, ratio=8):
    channel = input_feature.shape[-1]

    # Pooling moyen et max sur l'espace (H, W)
    avg_pool = L.GlobalAveragePooling2D()(input_feature)
    max_pool = L.GlobalMaxPooling2D()(input_feature)

    # On reforme pour avoir (batch, 1, 1, channels)
    avg_pool = L.Reshape((1, 1, channel))(avg_pool)
    max_pool = L.Reshape((1, 1, channel))(max_pool)

    shared_dense_one = L.Dense(channel // ratio,
                               activation='relu',
                               kernel_initializer='he_normal',
                               use_bias=True,
                               kernel_regularizer=keras.regularizers.l2(1e-4),
                               bias_initializer='zeros')
    shared_dense_two = L.Dense(channel,
                               kernel_initializer='he_normal',
                               use_bias=True,
                               kernel_regularizer=keras.regularizers.l2(1e-4),
                               bias_initializer='zeros')

    avg_out = shared_dense_two(shared_dense_one(avg_pool))
    max_out = shared_dense_two(shared_dense_one(max_pool))

    added = L.Add()([avg_out, max_out])
    added = L.Dense(24, activation='relu', kernel_initializer='he_normal', kernel_regularizer=keras.regularizers.l2(1e-4))(added)
    added = L.Dense(channel, activation='sigmoid', kernel_initializer='he_normal', kernel_regularizer=keras.regularizers.l2(1e-4))(added)

    return L.Multiply()([input_feature, added])


# Définition de l'attention spatiale
def spatial_attention(input_feature):
    # On calcule le pooling moyen et max sur les canaux
    avg_pool = tf.reduce_mean(input_feature, axis=-1, keepdims=True)
    max_pool = tf.reduce_max(input_feature, axis=-1, keepdims=True)
    concat = L.Concatenate(axis=-1)([avg_pool, max_pool])

    # Convolution pour générer la carte d'attention spatiale
    cbam_feature = L.Conv2D(filters=1,
                            kernel_size=7,
                            strides=1,
                            padding='same',
                            activation='sigmoid',
                            kernel_initializer='he_normal',
                            kernel_regularizer=keras.regularizers.l2(1e-4),
                            use_bias=False)(concat)
    return L.Multiply()([input_feature, cbam_feature])


# Bloc CBAM
def cbam_block(input_feature, ratio=8):
    x = channel_attention(input_feature, ratio)
    x = spatial_attention(x)
    return x



def mixconv2d(x, kernels=[3,5]):
    input_channels = x.shape[-1]
    num_groups = len(kernels)
    channels_per_group = input_channels // num_groups

    group_outputs = []
    for i, k in enumerate(kernels):

        # Sélectionne un "groupe" de canaux
        x_group = L.Lambda(lambda z: z[:,:,:, i*channels_per_group:(i+1)*channels_per_group])(x)

        # Convolution depthwise
        x_group = L.DepthwiseConv2D(
            kernel_size=k,
            strides=1,
            padding='same',
            use_bias=False,
            kernel_regularizer=regularizers.l2(1e-4)
        )(x_group)

        # On recolle le résultat de ce groupe
        group_outputs.append(x_group)

    # On concatène le tout
    out = L.Concatenate(axis=-1)(group_outputs)
    return out


# Bloc Bottleneck
def bottleneck(x, kernel, stride, expansion, output_channels, activation, se=False):
    input_tensor = x

    # Phase d'expansion
    x = L.Conv2D(expansion, kernel_size=1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(x)
    x = L.BatchNormalization()(x)
    x = activation(x)

    # Convolution depthwise
    x = mixconv2d(x, kernels=[3, 5])
    x = L.BatchNormalization()(x)
    x = activation(x)

    # Squeeze-and-Excite remplacé par CBAM
    if se:
        x = cbam_block(x, ratio=8)

    # Convolution pointwise
    x = L.Conv2D(output_channels, kernel_size=1, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(x)
    x = L.BatchNormalization()(x)
    x = activation(x)

    # Connexion résiduelle : si le stride est 1 et les dimensions correspondent
    if stride == 1:
        if input_tensor.shape[-1] == output_channels:
            x = L.Add()([x, input_tensor])
        else:
            shortcut = L.Conv2D(output_channels, kernel_size=1, strides=stride, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(input_tensor)
            shortcut = L.BatchNormalization()(shortcut)
            x = L.Add()([x, shortcut])
    else:
        # Si stride > 1, on ajuste la dimension du raccourci
        shortcut = L.Conv2D(output_channels, kernel_size=1, strides=stride, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(input_tensor)
        shortcut = L.BatchNormalization()(shortcut)
        x = L.Add()([x, shortcut])

    return x



def MobileNetV3_B(input_shape):
    inputs = L.Input(shape=input_shape)


    x = L.Conv2D(32, kernel_size=3, padding='same', kernel_regularizer=keras.regularizers.l2(1e-4), use_bias=False)(inputs)
    x = L.BatchNormalization()(x)
    x = h_swish(x)



    x = bottleneck(x, kernel=3, stride=1, expansion=32, output_channels=32,
                   activation=tf.keras.activations.swish, se=False)


    x = bottleneck(x, kernel=3, stride=1, expansion=32, output_channels=32,
                   activation=tf.keras.activations.swish, se=True)


    x = bottleneck(x, kernel=3, stride=1, expansion=64, output_channels=64,
                   activation=h_swish, se=False)


    x = bottleneck(x, kernel=3, stride=1, expansion=128, output_channels=128,
                   activation=h_swish, se=True)




    # Policy head
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(x)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.Activation('swish')(policy_head)
    policy_head = L.Conv2D(4, 3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(policy_head)
    policy_head = L.BatchNormalization()(policy_head)
    policy_head = L.Activation('swish')(policy_head)
    policy_head = L.Conv2D(filters=1, kernel_size=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(1e-4))(policy_head)
    policy_head = L.Flatten()(policy_head)
    policy_out = L.Activation('softmax', name='policy')(policy_head)


    # Value head
    value_head = L.Conv2D(filters=16, kernel_size=3, strides=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2(0.0001))(x)
    value_head = L.BatchNormalization()(value_head)
    value_head = L.Activation("swish")(value_head)
    value_head = L.GlobalAveragePooling2D()(value_head)
    value_head = L.Dense(50, activation='swish',
                              kernel_regularizer=regularizers.l2(0.0001))(value_head)

    value_out = L.Dense(1, activation='sigmoid', name='value',
                              kernel_regularizer=regularizers.l2(0.0001))(value_head)


    model = keras.Model(inputs=inputs, outputs=[policy_out, value_out], name='MobileNet_go')

    return model

computation

In [None]:
model_3B = MobileNetV3_B(input_shape=(19, 19, 31))
model_3B.summary()


epochs = 800
batch = 64

#Cosine Annealing
initial_learning_rate = 0.001  # Peut être augmenté (0.01) si nécessaire
total_steps = epochs * (N // batch)  # 10 * (10_000//64) ≈ 1560
alpha = 0.1  # LR final = 0.1% du LR initial (0.001 * 0.1 = 0.0001)

# --- Définition du scheduler ---
lr_schedule = CosineDecay(
    initial_learning_rate,
    total_steps,
    alpha=alpha
)


model_3B.compile(optimizer= tf.keras.optimizers.legacy.Adam(learning_rate=lr_schedule),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={
        'policy': ['categorical_accuracy'],
        'value': ['mse'] })

## Training
Ici changer model par le nom du modèle que l'on souhaite entrainer

In [None]:
# Fichier CSV pour stocker les pertes et métriques de validation
csv_filename = "save_metrics.csv"

# Initialisation des listes de stockage des métriques
policy_losses = []
value_losses = []
val_policy_accuracy = []
val_value_mse = []

# Boucle d'entraînement
for i in range(1, epochs + 1):
    print(f"Epoch {i}")

    golois.getBatch(input_data, policy, value, end, groups, i * N)

    history = model.fit(input_data,
                         {'policy': policy, 'value': value},
                         epochs=1, batch_size=batch)

    #si on tourne en local, décommenter cette partie et commenter la partie au dessus
    #dataset = tf.data.Dataset.from_tensor_slices((input_data, {'policy': policy, 'value': value}))
    #dataset = dataset.shuffle(N).batch(batch).prefetch(tf.data.AUTOTUNE)
    #history = model.fit(dataset, epochs=1, callbacks=[early_stopping])


    policy_losses.append(history.history['policy_loss'][0])
    value_losses.append(history.history['value_loss'][0])

    # Libération de mémoire tous les 5 epochs
    if i % 5 == 0:
        gc.collect()


    if i % 20 == 0:
        golois.getValidation(input_data, policy, value, end)
        val = model.evaluate(input_data, [policy, value], verbose=0, batch_size=batch)

        # Stockage des résultats de validation
        val_policy_accuracy.append(val[3])  # categorical_accuracy
        val_value_mse.append(val[4])  # mse

        # Enregistrement dans le log
        log_message = f"Validation at epoch {i}: {val}"
        print(log_message)

    #ici juste pour dire que si on est pas en validation, on met des 0 au lieu de rien mettre
    if not (i%20==0) :
        val_policy_accuracy.append(0)  # categorical_accuracy
        val_value_mse.append(0)  # mse


# Sauvegarde des métriques dans un fichier CSV à la fin du training
with open(csv_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Epoch", "Policy Loss", "Value Loss", "Validation Policy Accuracy", "Validation Value MSE"])
    for epoch in range(len(policy_losses)):
        writer.writerow([
            epoch + 1,
            policy_losses[epoch],
            value_losses[epoch],
            val_policy_accuracy[epoch],
            val_value_mse[epoch]
        ])

# Sauvegarde du modèle entraîné
model_filename = "model.h5"
model.save(model_filename)
print(f"Modèle sauvegardé sous {model_filename}")