In [None]:
import sys
sys.path.append('../')

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow.python.client import device_lib
print([dev.name for dev in device_lib.list_local_devices()])

# Load dataset

In [None]:
import math
from glob import glob
import tensorflow as tf

In [None]:
files = glob("../data/dataset/2018_1600.bin")
record_size = 3 * 12 * 8
batch_size = 4096
dataset_size = sum([os.path.getsize(f) for f in files]) / record_size
batches_per_epoch = math.ceil(dataset_size / batch_size)

dataset = tf.data.FixedLengthRecordDataset(filenames=files, record_bytes=record_size)
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
dataset = dataset.apply(tf.data.experimental.copy_to_device('/gpu:0'))
dataset = dataset.prefetch(tf.data.AUTOTUNE)

#import tensorflow_datasets as tfds
#tfds.benchmark(dataset, batch_size=batch_size)

dataset_size

# Train

In [None]:
from tqdm import tqdm
from time import time
import keras
from keras.models import Model
from keras.layers import Dense
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, TensorBoard, CallbackList
from lib.encoding import encode_board, decode_board

In [None]:
def custom_loss(y_pred):
    """
    Compute loss as defined in https://erikbern.com/2014/11/29/deep-learning-for-chess.html
    // sum(p,q,r)logS(f(q)−f(r))+K*log(f(p)+f(q))+K*log(−f(q)−f(p))
    """
    p = y_pred[:,0]
    q = y_pred[:,1]
    r = y_pred[:,2]
    K = 1.0

    rq_diff = r - q
    pq_diff = K * (p + q)

    a = - tf.math.reduce_mean(tf.math.log(tf.math.sigmoid(rq_diff)))
    b = - tf.math.reduce_mean(tf.math.log(tf.math.sigmoid( pq_diff)))
    c = - tf.math.reduce_mean(tf.math.log(tf.math.sigmoid(-pq_diff)))

    reg = 0.0 # L2
    for x in chess_model.trainable_variables:
        reg += 0.01 * tf.math.reduce_mean(tf.math.square(x))

    loss = a + b + c
    obj = loss + reg

    return loss, obj

def make_chess_model():
    inp = tf.keras.Input(shape=(12,), dtype=tf.int64)
    x = decode_board(inp) # convert 12 ints to 768 floats
    x = Dense(256, activation="relu")(x)
    x = Dense(256, activation="relu")(x)
    x = Dense(256, activation="relu")(x)
    x = Dense(1)(x)
    return Model(inp, x)

chess_model = make_chess_model()
chess_model.summary()

In [None]:
ts = int(time())
epochs = 99999
#optimizer = SGD(learning_rate=0.03, nesterov=True, momentum=0.9, clipnorm=1)
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=100 * 500,
    decay_rate=0.98,
    staircase=True
)
optimizer = Adam(learning_rate=lr_schedule)
loss_tracker = keras.metrics.Mean(name="loss")
obj_tracker = keras.metrics.Mean(name="obj")
callbacks = CallbackList([
    ModelCheckpoint(f"checkpoints/{ts}" + "/model-{epoch:04d}-{loss:.3f}.keras", monitor="loss", save_best_only=True),
    TensorBoard(log_dir=f"./logs/{ts}", write_graph=False)    
], model=chess_model)

@tf.function
def train_step(batch):
    batch = tf.reshape(tf.io.decode_raw(batch, tf.int64), (-1, 3, 12))

    # Open a GradientTape to record the operations run
    # during the forward pass, which enables auto-differentiation.
    with tf.GradientTape() as tape:
        # Run the forward pass of the layer.
        # The operations that the layer applies
        # to its inputs are going to be recorded
        # on the GradientTape.
        logits = tf.reshape(chess_model(tf.reshape(batch, (-1, 12)), training=True), (-1, 3))  # Logits for this minibatch

        # Compute the loss value for this minibatch.
        loss_value, obj_value = custom_loss(logits)

    # Use the gradient tape to automatically retrieve
    # the gradients of the trainable variables with respect to the loss.
    grads = tape.gradient(obj_value, chess_model.trainable_weights)

    # Run one step of gradient descent by updating
    # the value of the variables to minimize the loss.
    optimizer.apply_gradients(zip(grads, chess_model.trainable_weights))

    # Update metrics
    loss_tracker.update_state(loss_value)
    obj_tracker.update_state(obj_value)

callbacks.on_train_begin()
for epoch in range(epochs):
    loss_tracker.reset_states()
    obj_tracker.reset_states()
    callbacks.on_epoch_begin(epoch)

    batch_i = 0
    batch_i_last = 0
    with tqdm(total=batches_per_epoch, bar_format=f"Epoch {epoch+1}/{epochs}" + " {l_bar}{bar:10}{r_bar}{bar:-10b}") as pbar:
        for batch in dataset:
            train_step(batch)

            batch_i += 1
            if batch_i % 10 == 0 or batch_i == batches_per_epoch:
                pbar.set_postfix_str(f"loss={loss_tracker.result():.4f} obj={obj_tracker.result():.4f} lr={optimizer.learning_rate.numpy():.4f}")
                pbar.update(batch_i - batch_i_last)
                batch_i_last = batch_i

    mean_tw = sum([tf.reduce_mean(tf.abs(tw)).numpy() for tw in chess_model.trainable_weights])

    import chess
    good_board = chess.Board("2kr3r/1pp1pp1p/1p6/q4bP1/2B5/4BP2/Pb1NQK1P/R2R4 w - - 1 18")
    bad_board = chess.Board("2kr3r/1pp1pp1p/1p6/q4bP1/2B5/4BP2/Pb1NQK1P/R1R5 w - - 1 18")
    pred = chess_model.predict(tf.concat([encode_board(good_board), encode_board(bad_board)], axis=0), verbose=0)

    callbacks.on_epoch_end(epoch, logs={
        "loss": loss_tracker.result(),
        "obj": obj_tracker.result(),
        "lr": optimizer.learning_rate.numpy(),
        "good": pred[0][0],
        "bad": pred[1][0],
        "mean_tw": mean_tw
    })
callbacks.on_train_end()
