In [1]:
import sys
sys.path.append('../')

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow.python.client import device_lib
print([dev.name for dev in device_lib.list_local_devices()])

['/device:CPU:0', '/device:GPU:0']


# Load dataset

In [2]:
import math
from glob import glob
import tensorflow as tf

In [3]:
files = glob("../data/dataset/*_1600.bin")
record_size = 3 * 12 * 8
batch_size = 4096
dataset_size = sum([os.path.getsize(f) for f in files]) / record_size
batches_per_epoch = math.ceil(dataset_size / batch_size)

dataset = tf.data.FixedLengthRecordDataset(filenames=files, record_bytes=record_size)
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
dataset = dataset.apply(tf.data.experimental.copy_to_device('/gpu:0'))
dataset = dataset.prefetch(tf.data.AUTOTUNE)

#import tensorflow_datasets as tfds
#tfds.benchmark(dataset, batch_size=batch_size)

dataset_size

39098455.0

# Train

In [4]:
from tqdm import tqdm
from time import time
import keras
from keras.models import Model
from keras.layers import Dense
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, TensorBoard, CallbackList, ReduceLROnPlateau
from lib.encoding import encode_board, decode_board

In [5]:
def custom_loss(y_pred):
    """
    Compute loss as defined in https://erikbern.com/2014/11/29/deep-learning-for-chess.html
    // sum(p,q,r)logS(f(q)−f(r))+K*log(f(p)+f(q))+K*log(−f(q)−f(p))
    """
    p = y_pred[:,0]
    q = y_pred[:,1]
    r = y_pred[:,2]

    a = -tf.math.reduce_mean(tf.math.log(tf.math.sigmoid(r - q)))
    b = tf.math.reduce_mean(tf.math.square(p + q))

    reg = 0.0 # L2
    for x in chess_model.trainable_variables:
        reg += 0.01 * tf.math.reduce_mean(tf.math.square(x))

    loss = a + b
    obj = loss + reg

    return loss, obj

def make_chess_model():
    inp = tf.keras.Input(shape=(12,), dtype=tf.int64)
    x = decode_board(inp) # convert 12 ints to 768 floats
    x = Dense(256, activation="relu")(x)
    x = Dense(256, activation="relu")(x)
    x = Dense(256, activation="relu")(x)
    x = Dense(1, activation="tanh")(x)
    return Model(inp, x)

chess_model = make_chess_model()
chess_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 12)]              0         
                                                                 
 tf.expand_dims (TFOpLambda  (None, 12, 1)             0         
 )                                                               
                                                                 
 tf.bitwise.bitwise_and (TF  (None, 12, 64)            0         
 OpLambda)                                                       
                                                                 
 tf.math.not_equal (TFOpLam  (None, 12, 64)            0         
 bda)                                                            
                                                                 
 tf.cast (TFOpLambda)        (None, 12, 64)            0         
                                                             

In [6]:
#name = "1706738403-1024-rq+mse"
#chess_model = keras.models.load_model(f"/mnt/c/Users/mlomb/Desktop/Tesis/cs-master-thesis/notebooks/checkpoints/1706738403-1024-rq+mse/model-0035-0.448.keras")

In [7]:
name = f"{int(time())}-256--nsrq+mse-tanh"
epochs = 99999
optimizer = Adam(learning_rate=0.01)
loss_tracker = keras.metrics.Mean(name="loss")
obj_tracker = keras.metrics.Mean(name="obj")
callbacks = CallbackList([
    ModelCheckpoint(f"checkpoints/{name}" + "/model-{epoch:04d}-{loss:.3f}.keras", monitor="loss", save_best_only=True),
    TensorBoard(log_dir=f"./logs/{name}", write_graph=False)
], model=chess_model)

@tf.function
def train_step(batch):
    batch = tf.reshape(tf.io.decode_raw(batch, tf.int64), (-1, 3, 12))

    # Open a GradientTape to record the operations run
    # during the forward pass, which enables auto-differentiation.
    with tf.GradientTape() as tape:
        # Run the forward pass of the layer.
        # The operations that the layer applies
        # to its inputs are going to be recorded
        # on the GradientTape.
        logits = tf.reshape(chess_model(tf.reshape(batch, (-1, 12)), training=True), (-1, 3))  # Logits for this minibatch

        # Compute the loss value for this minibatch.
        loss_value, obj_value = custom_loss(logits)

    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss.
    grads = tape.gradient(obj_value, chess_model.trainable_weights)

    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients(zip(grads, chess_model.trainable_weights))

    # Update metrics
    loss_tracker.update_state(loss_value)
    obj_tracker.update_state(obj_value)

prev_loss = 0
epochs_without_improvement = 0

callbacks.on_train_begin()
for epoch in range(epochs):
    loss_tracker.reset_states()
    obj_tracker.reset_states()
    callbacks.on_epoch_begin(epoch)

    if math.isnan(loss_tracker.result()):
        print("Loss is NaN, exiting")
        break

    batch_i = 0
    batch_i_last = 0
    with tqdm(total=batches_per_epoch, bar_format=f"Epoch {epoch+1}/{epochs}" + " {l_bar}{bar:10}{r_bar}{bar:-10b}") as pbar:
        for batch in dataset:
            train_step(batch)

            batch_i += 1
            if batch_i % 10 == 0 or batch_i == batches_per_epoch:
                pbar.set_postfix_str(f"loss={loss_tracker.result():.4f} obj={obj_tracker.result():.4f} lr={optimizer.learning_rate.numpy():.4f}")
                pbar.update(batch_i - batch_i_last)
                batch_i_last = batch_i

    mean_tw = sum([tf.reduce_mean(tf.abs(tw)).numpy() for tw in chess_model.trainable_weights])

    epoch_loss = loss_tracker.result()

    logs = {
        "loss": epoch_loss,
        "obj": obj_tracker.result(),
        "lr": optimizer.learning_rate.numpy(),
        "mean_tw": mean_tw
    }

    # NOTE: GOOD/BAD based on the POV of WHO JUST PLAYED
    samples = [
        # 2kr3r/1pp1pp1p/1p6/q4bP1/2B5/4BP2/Pb1NQK1P/R6R w - - 0 18
        #("2r4r/pB1nqk1p/4bp2/2b5/Q4Bp1/1P6/1PP1PP1P/2KR3R w - - 1 18", # good
        #"3r3r/pB1nqk1p/4bp2/2b5/Q4Bp1/1P6/1PP1PP1P/2KR3R w - - 1 18"), # bad

        # mega blunder
        # 2n1kb1r/r2n1ppp/1R2p3/p2pP1N1/Q2P3q/1N2B3/P4PPP/1R4K1 w k - 1 18
        ("1r4k1/p4ppp/1n2b3/q2p3Q/P2Pp1n1/4r3/R2N1PPP/2N1KB1R w K - 0 18", # good
        "1r4k1/p4ppp/1n2b3/q2p3Q/P2Pp1n1/4P3/R2N1PPP/1rN1KB1R w K - 2 18"), # bad

        # inaccuracy
        # r2qkb1r/1pp1nppp/p1n1p3/1B1pPb2/3P4/5N2/PPP2PPP/RNBQ1RK1 w kq - 0 7
        ("rnbq1rk1/ppp2ppp/5n2/3p4/3PpB2/P1b1P3/1PP1NPPP/R2QKB1R w KQ - 0 7", # good
        "rnbq1rk1/ppp1bppp/5n2/3p4/3PpB2/P1N1P3/1PP1NPPP/R2QKB1R w KQ - 1 7") # bad
    ]

    import chess
    for (i, (good_fen, bad_fen)) in enumerate(samples):
        good_board = chess.Board(good_fen)
        bad_board = chess.Board(bad_fen)
        pred = chess_model.predict(tf.concat([encode_board(good_board), encode_board(bad_board)], axis=0), verbose=0)
        logs[f"diff{i}"] = pred[0][0] - pred[1][0] # positive diff = training is good

    callbacks.on_epoch_end(epoch, logs)

    #lr_schedule = ReduceLROnPlateau(
    #    monitor='loss',
    #    factor=0.7,
    #    patience=5,
    #    min_delta=0.001,
    #)
    if prev_loss - epoch_loss >= 0.0001:
        # improvement!
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1
    prev_loss = epoch_loss
    if epochs_without_improvement >= 6:
        keras.backend.set_value(optimizer.learning_rate, optimizer.learning_rate.numpy() * 0.6)
        epochs_without_improvement = 0
callbacks.on_train_end()


I0000 00:00:1706878541.733489   28735 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
Epoch 1/99999   8%|▊         | 790/9546 [00:20<03:47, 38.54it/s, loss=nan obj=nan lr=0.0100]        


KeyboardInterrupt: 

In [None]:
import chess
b = chess.Board("r2qkb1r/1pp1nppp/p1n1p3/3pPb2/3P4/5N2/PPP1BPPP/RNBQ1RK1 b kq - 1 7").mirror()
print(b.fen())
b

In [None]:
chess_model = keras.models.load_model("/mnt/c/Users/mlomb/Desktop/Tesis/cs-master-thesis/notebooks/checkpoints/1706848193-256-rq+mse-tanh/model-0140-0.535.keras", compile=False)

# NOTE: GOOD/BAD based on the POV of WHO JUST PLAYED
samples = [
    # 2kr3r/1pp1pp1p/1p6/q4bP1/2B5/4BP2/Pb1NQK1P/R6R w - - 0 18
    #("2r4r/pB1nqk1p/4bp2/2b5/Q4Bp1/1P6/1PP1PP1P/2KR3R w - - 1 18", # good
    #"3r3r/pB1nqk1p/4bp2/2b5/Q4Bp1/1P6/1PP1PP1P/2KR3R w - - 1 18"), # bad

    # mega blunder
    # 2n1kb1r/r2n1ppp/1R2p3/p2pP1N1/Q2P3q/1N2B3/P4PPP/1R4K1 w k - 1 18
    ("1r4k1/p4ppp/1n2b3/q2p3Q/P2Pp1n1/4r3/R2N1PPP/2N1KB1R w K - 0 18", # good
    "1r4k1/p4ppp/1n2b3/q2p3Q/P2Pp1n1/4P3/R2N1PPP/1rN1KB1R w K - 2 18"), # bad

    # inaccuracy
    # r2qkb1r/1pp1nppp/p1n1p3/1B1pPb2/3P4/5N2/PPP2PPP/RNBQ1RK1 w kq - 0 7
    ("rnbq1rk1/ppp2ppp/5n2/3p4/3PpB2/P1b1P3/1PP1NPPP/R2QKB1R w KQ - 0 7", # good
    "rnbq1rk1/ppp1bppp/5n2/3p4/3PpB2/P1N1P3/1PP1NPPP/R2QKB1R w KQ - 1 7") # bad
]

import chess
for (i, (good_fen, bad_fen)) in enumerate(samples):
    good_board = chess.Board(good_fen)
    bad_board = chess.Board(bad_fen)
    pred = chess_model.predict(tf.concat([encode_board(good_board), encode_board(bad_board)], axis=0), verbose=0)
    
    print(pred)
    print(pred[0][0] - pred[1][0]) # good - bad should be +
