IMPORTING LIBRARIES

In [1]:
import numpy as np
import tensorflow as tf
import chess
import chess.pgn
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import gensim
from gensim.models import Word2Vec
sns.set()

FEN UTILITY FUNCTIONS

In [3]:
def embed_fen(fen):
    board = chess.Board(fen)
    embed = [0]*768
    piece_dict = {'K': 0, 'Q': 1, 'R': 2, 'B': 3, 'N': 4, 'P': 5, 'k': 6, 'q': 7, 'r': 8, 'b': 9, 'n': 10, 'p': 11}
    for square, piece in board.piece_map().items():
        embed[square*12 + piece_dict[str(piece)]] = 1
    #embed[-1] = int(str(stock_eval(board)))/100 #to include stockfish evaluation
    return embed

In [4]:
def decode_embed(embed):
    piece_dict = {id:piece for id, piece in zip(np.arange(12), ['K', 'Q', 'R', 'B', 'N', 'P', 'k', 'q', 'r', 'b', 'n', 'p'])}
    piece_loc = np.arange(len(embed))[embed == 1]
    board = chess.Board(None)
    for loc in piece_loc:
        sq = loc//12
        p = piece_dict[loc % 12]
        board.set_piece_at(sq, chess.Piece.from_symbol(p))
    return board

POSITIONAL ENCODING ON TOKENIZED INPUT

In [5]:
def pos_enc_matrix(L, d, n = 10000):
    """Create positional encoding matrix

    Args:
        L: Input dimension (length)
        d: Output dimension (depth), even only
        n: Constant for the sinusoidal functions

    Returns:
        numpy matrix of floats of dimension L-by-d. At element (k,2i) the value
        is sin(k/n^(2i/d)) while at element (k,2i+1) the value is cos(k/n^(2i/d))
    """
    assert d % 2 == 0, "Output dimension needs to be an even integer"
    d2 = d//2
    P = np.zeros((L, d))
    k = np.arange(L).reshape(-1, 1)     # L-column vector
    i = np.arange(d2).reshape(1, -1)    # d-row vector
    denom = np.power(n, -i/d2)          # n**(-2*i/d)
    args = k * denom                    # (L,d) matrix
    P[:, ::2] = np.sin(args)
    P[:, 1::2] = np.cos(args)
    return P

In [6]:
#position embedding class for decoder (to be used as a layer)
#@tf.keras.utils.register_keras_serializable()
class PositionalEmbeddingDecoder(tf.keras.layers.Layer):
    """Positional embedding layer. Assume tokenized input, transform into
    embedding and returns positional-encoded output."""
    def __init__(self, seq_len, vocab_size, embed_dim, **kwargs):
        """
        Args:
            seq_len: Input sequence length
            vocab_size: Input vocab size, for setting up embedding matrix
            embed_dim: Embedding vector size, for setting up embedding matrix
        """
        super().__init__(**kwargs)
        self.seq_len = seq_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim     # d_model in paper
        # token embedding layer: Convert integer token to D-dim float vector
        self.token_embeddings = tf.keras.layers.Embedding(
            input_dim = vocab_size, output_dim = embed_dim, mask_zero = True
        )
        # positional embedding layer: a matrix of hard-coded sine values
        matrix = pos_enc_matrix(seq_len, embed_dim)
        self.position_embeddings = tf.constant(matrix, dtype = "float32")

    def call(self, inputs):
        """Input tokens convert into embedding vectors then superimposed
        with position vectors"""
        embedded_tokens = self.token_embeddings(inputs)
        return embedded_tokens + self.position_embeddings

    # this layer is using an Embedding layer, which can take a mask
    # passing_mask_tensors_directly_to_layers
    def compute_mask(self, *args, **kwargs):
        return self.token_embeddings.compute_mask(*args, **kwargs)

    def get_config(self):
        # to make save and load a model using custom layer possible
        config = super().get_config()
        config.update({
            "seq_len": self.seq_len,
            "vocab_size": self.vocab_size,
            "embed_dim": self.embed_dim,
        })
        return config

In [7]:
# #position embedding class for encoder (to be used as a layer)
# #input is padded (to sequence length) and one hot encoded
# #@tf.keras.utils.register_keras_serializable()
# class PositionalEmbeddingEncoder(tf.keras.layers.Layer):
#     """Positional embedding layer. Assume tokenized input, transform into
#     embedding and returns positional-encoded output."""
#     def __init__(self, seq_len, embed_dim, embed_model, **kwargs):
#         """
#         Args:
#             sequence_length: Input sequence length
#             vocab_size: Input vocab size, for setting up embedding matrix
#             embed_dim: Embedding vector size, for setting up embedding matrix
#         """
#         super().__init__(**kwargs)
#         self.seq_len = seq_len
#         self.embed_dim = embed_dim     # d_model in paper
#         # token embedding layer: Convert integer token to D-dim float vector
#         self.embed_model = embed_model
#         #self.token_embeddings = tf.keras.layers.TimeDistributed(embed_model)
#         # positional embedding layer: a matrix of hard-coded sine values
#         matrix = pos_enc_matrix(seq_len, embed_dim)
#         self.position_embeddings = tf.constant(matrix, dtype = "float32")

#     def call(self, inputs):
#         """Input tokens convert into embedding vectors then superimposed
#         with position vectors"""
#         #embedded_tokens = self.token_embeddings(inputs) #keeping the zero rows
#         #embedded_tokens = tf.stack([self.embed_model(inputs[i]) for i in range(self.seq_len)])
#         embedded_tokens = tf.stack([self.embed_model(inputs[:, i, :]) for i in range(self.seq_len)], axis = 1)
#         return embedded_tokens + self.position_embeddings

#     # this layer is using an Embedding layer, which can take a mask
#     # passing_mask_tensors_directly_to_layers
#     def compute_mask(self, inputs, *args, **kwargs):
#         return tf.reduce_all(inputs == 0, axis = 2)

#     def get_config(self):
#         # to make save and load a model using custom layer possible
#         config = super().get_config()
#         config.update({
#             "seq_len": self.seq_len,
#             "embed_model": self.embed_model.to_json(),
#             "embed_dim": self.embed_dim,
#         })
#         return config

#     @classmethod
#     def from_config(cls, config):
#         embed_model = tf.keras.models.model_from_json(config.pop('embed_model'))
#         return cls(embed_model = embed_model, **config)

In [8]:
#position embedding class for encoder (to be used as a layer)
#input is padded (to sequence length) and one hot encoded
#@tf.keras.utils.register_keras_serializable()
class PositionalEmbeddingEncoder(tf.keras.layers.Layer):
    """Positional embedding layer. Assume tokenized input, transform into
    embedding and returns positional-encoded output."""
    def __init__(self, seq_len, embed_dim, **kwargs):
        """
        Args:
            sequence_length: Input sequence length
            vocab_size: Input vocab size, for setting up embedding matrix
            embed_dim: Embedding vector size, for setting up embedding matrix
        """
        super().__init__(**kwargs)
        self.seq_len = seq_len
        self.embed_dim = embed_dim     # d_model in paper
        # token embedding layer: Convert integer token to D-dim float vector
        # self.embed_model = embed_model
        #self.token_embeddings = tf.keras.layers.TimeDistributed(embed_model)
        # positional embedding layer: a matrix of hard-coded sine values
        matrix = pos_enc_matrix(seq_len, embed_dim)
        self.position_embeddings = tf.constant(matrix, dtype = "float32")

    def call(self, inputs):
        """Input tokens convert into embedding vectors then superimposed
        with position vectors"""
        #embedded_tokens = self.token_embeddings(inputs) #keeping the zero rows
        #embedded_tokens = tf.stack([self.embed_model(inputs[i]) for i in range(self.seq_len)])
        #embedded_tokens = tf.stack([self.embed_model(inputs[:, i, :]) for i in range(self.seq_len)], axis = 1)
        #return embedded_tokens + self.position_embeddings
        return inputs + self.position_embeddings

    # this layer is using an Embedding layer, which can take a mask
    # passing_mask_tensors_directly_to_layers
    def compute_mask(self, inputs, *args, **kwargs):
        return tf.reduce_all(inputs == 0, axis = 2)

    def get_config(self):
        # to make save and load a model using custom layer possible
        config = super().get_config()
        config.update({
            "seq_len": self.seq_len,
            "embed_dim": self.embed_dim,
        })
        return config

ATTENTION FUNCTIONS

In [9]:
def self_attention(input_shape, prefix="att", mask=False, **kwargs):
    """Self-attention layers at transformer encoder and decoder. Assumes its
    input is the output from positional encoding layer.

    Args:
        prefix (str): The prefix added to the layer names
        masked (bool): whether to use causal mask. Should be False on encoder and
                       True on decoder. When True, a mask will be applied such that
                       each location only has access to the locations before it.
    """
    # create layers
    inputs = tf.keras.layers.Input(shape=input_shape, dtype='float32',
                                   name=f"{prefix}_in1")
    attention = tf.keras.layers.MultiHeadAttention(name=f"{prefix}_attn1", **kwargs)
    norm = tf.keras.layers.LayerNormalization(name=f"{prefix}_norm1")
    add = tf.keras.layers.Add(name=f"{prefix}_add1")
    # functional API to connect input to output
    attout = attention(query=inputs, value=inputs, key=inputs,
                       use_causal_mask=mask)
    outputs = norm(add([inputs, attout]))
    # create model and return
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name=f"{prefix}_att")
    return model

In [10]:
def cross_attention(input_shape, context_shape, prefix="att", **kwargs):
    """Cross-attention layers at transformer decoder. Assumes its
    input is the output from positional encoding layer at decoder
    and context is the final output from encoder.

    Args:
        prefix (str): The prefix added to the layer names
    """
    # create layers
    context = tf.keras.layers.Input(shape=context_shape, dtype='float32',
                                    name=f"{prefix}_ctx2")
    inputs = tf.keras.layers.Input(shape=input_shape, dtype='float32',
                                   name=f"{prefix}_in2")
    attention = tf.keras.layers.MultiHeadAttention(name=f"{prefix}_attn2", **kwargs)
    norm = tf.keras.layers.LayerNormalization(name=f"{prefix}_norm2")
    add = tf.keras.layers.Add(name=f"{prefix}_add2")
    # functional API to connect input to output
    attout = attention(query=inputs, value=context, key=context)
    outputs = norm(add([attout, inputs]))
    # create model and return
    model = tf.keras.Model(inputs=[(context, inputs)], outputs=outputs,
                           name=f"{prefix}_cross")
    return model

In [11]:
def feed_forward(input_shape, model_dim, ff_dim, dropout=0.1, prefix="ff"):
    """Feed-forward layers at transformer encoder and decoder. Assumes its
    input is the output from an attention layer with add & norm, the output
    is the output of one encoder or decoder block

    Args:
        model_dim (int): Output dimension of the feed-forward layer, which
                         is also the output dimension of the encoder/decoder
                         block
        ff_dim (int): Internal dimension of the feed-forward layer
        dropout (float): Dropout rate
        prefix (str): The prefix added to the layer names
    """
    # create layers
    inputs = tf.keras.layers.Input(shape=input_shape, dtype='float32',
                                   name=f"{prefix}_in3")
    dense1 = tf.keras.layers.Dense(ff_dim, name=f"{prefix}_ff1", activation="relu")
    dense2 = tf.keras.layers.Dense(model_dim, name=f"{prefix}_ff2")
    drop = tf.keras.layers.Dropout(dropout, name=f"{prefix}_drop")
    add = tf.keras.layers.Add(name=f"{prefix}_add3")
    # functional API to connect input to output
    ffout = drop(dense2(dense1(inputs)))
    norm = tf.keras.layers.LayerNormalization(name=f"{prefix}_norm3")
    outputs = norm(add([inputs, ffout]))
    # create model and return
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name=f"{prefix}_ff")
    return model

In [12]:
def encoder(input_shape, key_dim, ff_dim, dropout=0.1, prefix="enc", **kwargs):
    """One encoder unit. The input and output are in the same shape so we can
    daisy chain multiple encoder units into one larger encoder"""
    model = tf.keras.models.Sequential([
        tf.keras.layers.Input(shape=input_shape, dtype='float32', name=f"{prefix}_in0"),
        self_attention(input_shape, prefix=prefix, key_dim=key_dim, mask=False, **kwargs),
        feed_forward(input_shape, key_dim, ff_dim, dropout, prefix),
    ], name=prefix)
    return model

In [13]:
def decoder(input_shape, key_dim, ff_dim, dropout=0.1, prefix="dec", **kwargs):
    """One decoder unit. The input and output are in the same shape so we can
    daisy chain multiple decoder units into one larger decoder. The context
    vector is also assumed to be the same shape for convenience"""
    inputs = tf.keras.layers.Input(shape=input_shape, dtype='float32',
                                   name=f"{prefix}_in0")
    context = tf.keras.layers.Input(shape=input_shape, dtype='float32',
                                    name=f"{prefix}_ctx0")
    attmodel = self_attention(input_shape, key_dim=key_dim, mask=True,
                              prefix=prefix, **kwargs)
    crossmodel = cross_attention(input_shape, input_shape, key_dim=key_dim,
                                 prefix=prefix, **kwargs)
    ffmodel = feed_forward(input_shape, key_dim, ff_dim, dropout, prefix)
    x = attmodel(inputs)
    x = crossmodel([(context, x)])
    output = ffmodel(x)
    model = tf.keras.Model(inputs=[(inputs, context)], outputs=output, name=prefix)
    return model

BUILDING THE TRANSFORMER MODEL

In [30]:
def transformer(num_layers, num_heads, seq_len_enc, seq_len_dec, key_dim, ff_dim,
                vocab_size_tgt, embed_model, dropout=0.1, name="transformer"):
    embed_shape_enc = (seq_len_enc, key_dim)  # output shape of the positional embedding layer (encoder)
    embed_shape_dec = (seq_len_dec, key_dim)
    # set up layers
    input_enc = tf.keras.layers.Input(shape=(seq_len_enc, key_dim), dtype="float32",
                                      name="encoder_inputs")
    input_dec = tf.keras.layers.Input(shape=(seq_len_dec,), dtype="int32",
                                      name="decoder_inputs")
    #embed_enc = PositionalEmbeddingEncoder(seq_len_enc, vocab_size_src, key_dim, name="embed_enc")
    embed_enc = PositionalEmbeddingEncoder(seq_len_enc, key_dim, name = 'embed_enc')
    embed_dec = PositionalEmbeddingDecoder(seq_len_dec, vocab_size_tgt, key_dim, name="embed_dec")
    encoders = [encoder(input_shape=embed_shape_enc, key_dim=key_dim,
                        ff_dim=ff_dim, dropout=dropout, prefix=f"enc{i}",
                        num_heads=num_heads)
                for i in range(num_layers)]
    decoders = [decoder(input_shape=embed_shape_dec, key_dim=key_dim,
                        ff_dim=ff_dim, dropout=dropout, prefix=f"dec{i}",
                        num_heads=num_heads)
                for i in range(num_layers)]
    final = tf.keras.layers.Dense(vocab_size_tgt, name="linear")
    # build output
    x1 = embed_enc(input_enc)
    x2 = embed_dec(input_dec)
    for layer in encoders:
        x1 = layer(x1)
    for layer in decoders:
        x2 = layer([x2, x1])
    output = final(x2)
    # XXX keep this try-except block
    try:
        del output._keras_mask
    except AttributeError:
        pass
    model = tf.keras.Model(inputs=[input_enc, input_dec], outputs=output, name=name)
    return model

CUSTOM LEARNING RATE SCHEDULING

In [15]:
#@tf.keras.utils.register_keras_serializable()
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
    "Custom learning rate for Adam optimizer"
    def __init__(self, key_dim, warmup_steps = 4000):
        super().__init__()
        self.key_dim = key_dim
        self.warmup_steps = warmup_steps
        self.d = tf.cast(self.key_dim, tf.float32)

    def __call__(self, step):
        step = tf.cast(step, dtype = tf.float32)
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)
        return tf.math.rsqrt(self.d) * tf.math.minimum(arg1, arg2)

    def get_config(self):
        # to make save and load a model using custom layer possible0
        config = {
            "key_dim": self.key_dim,
            "warmup_steps": self.warmup_steps,
        }
        return config

LOSS FUNCTION AND EVALUATION METRIC

In [16]:
def masked_loss(label, pred):
    mask = label != 0
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True, reduction = 'none')
    loss = loss_object(label, pred)
    mask = tf.cast(mask, dtype=loss.dtype)
    loss *= mask
    loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
    return loss

def masked_accuracy(label, pred):
    pred = tf.argmax(pred, axis = 2)
    label = tf.cast(label, pred.dtype)
    match = label == pred
    mask = label != 0
    match = match & mask
    match = tf.cast(match, dtype = tf.float32)
    mask = tf.cast(mask, dtype = tf.float32)
    return tf.reduce_sum(match)/tf.reduce_sum(mask)

READING THE DATA

In [2]:
with open('start_fens.txt', 'r') as s:
    start_fens = [line.strip() for line in s.readlines()]
with open('themes.txt', 'r') as t:
    themes = [line.strip() for line in t.readlines()]
with open('moves.pkl', 'rb') as m:
    moves = pickle.load(m)
with open('moves_san.pkl', 'rb') as ms:
    moves_san = pickle.load(ms)

In [3]:
moves_san = []
for i in range(len(start_fens)):
    board = chess.Board(start_fens[i])
    moves_san_i = []
    for move in moves[i]:
        san = board.san(move)
        san = san[:-1] if san[-1] in ['+', '#'] else san
        moves_san_i.append(san)
        board.push(move)
    moves_san.append(moves_san_i)

In [4]:
with open('moves_san_nosym.pkl', 'wb') as ms:
    pickle.dump(moves_san, ms)

In [20]:
indices = [i for i in range(len(start_fens)) if len(themes[i].split(' ')) <= 8 and len(moves[i]) < 9] #filtering the data based on analysis

TOKENIZING THE DECODER I/O (ONLY ON TRAINING DATA)

In [21]:
#Note: vocab_size and seq_length are to be determined after careful examination of the dataset (done above)!
vocab_size = 37 #tbd from data (themes)
seq_len_dec = 8 #tbd from data
#seq_len_enc = 9 #tbd from data
seq_len_enc = 8
vectorizer = tf.keras.layers.TextVectorization(
    max_tokens = vocab_size,
    standardize = None,
    split = "whitespace",
    output_mode = "int",
    output_sequence_length = seq_len_dec + 1
)

TRAIN-TEST SPLIT

In [22]:
ind_train, ind_test = train_test_split(indices, test_size = 0.25, shuffle = True, random_state = 42)

In [23]:
themes_train = [themes[i] for i in ind_train]
vectorizer.adapt(themes_train) #fitting the vectorizer on the training themes data

PREPARING THE DATASET

In [24]:
# # train and validation data generator
# #def data_generator(X_enc, X_dec, indices, seq_len, key_dim, vectorizer, batch_size = 32):
# def data_generator(start_fens, moves, themes, indices, seq_len_enc, vectorizer, batch_size = 32):
#     # vectorizer is the tokenization object for the decoder input/output
#     z = [0]*768
#     while True:
#         for start in range(0, len(indices), batch_size):
#             batch_indices = indices[start: start + batch_size]
#             batch_X_enc = []
#             batch_X_dec = []
#             batch_Y = []
#             for i in batch_indices:
#                 # shape(x) = (batch_size, seq_len, embed_dim)
#                 fens = [start_fens[i]]
#                 board = chess.Board(start_fens[i])
#                 for move in moves[i]:
#                     board.push(move)
#                     fens.append(board.fen())
#                 batch_X_enc_i = [embed_fen(fen) for fen in fens]
#                 batch_X_enc_i += [z]*(seq_len_enc - len(fens)) #encoder input
#                 batch_X_enc.append(batch_X_enc_i)
#                 vect_i = vectorizer(themes[i]).numpy()
#                 batch_X_dec.append(vect_i[:-1]) #decoder input
#                 batch_Y.append(vect_i[1:]) #decoder target (including the end sentinel shifted right by 1)
#             yield [np.array(batch_X_enc), np.array(batch_X_dec)], np.array(batch_Y)

In [76]:
# train and validation data generator
#def data_generator(X_enc, X_dec, indices, seq_len, key_dim, vectorizer, batch_size = 32):
def data_generator(moves_san, themes, indices, seq_len_enc, key_dim, embed_model, vectorizer, batch_size = 32):
    # vectorizer is the tokenization object for the decoder input/output
    z = [0]*key_dim
    while True:
        for start in range(0, len(indices), batch_size):
            batch_indices = indices[start: start + batch_size]
            batch_X_enc = []
            batch_X_dec = []
            batch_Y = []
            for i in batch_indices:
                # shape(x) = (batch_size, seq_len, embed_dim)
                batch_X_enc_i = []
                for move in moves_san[i]:
                    if move[-1] == '#':
                        move = move[:-1]
                    try:
                        batch_X_enc_i.append(embed_model.wv[move])
                    except:
                        break
                #batch_X_enc_i = [embed_model.wv[move] for move in moves_san[i]]
                batch_X_enc_i += [z]*(seq_len_enc - len(batch_X_enc_i)) #encoder input
                batch_X_enc.append(batch_X_enc_i)
                vect_i = vectorizer(themes[i]).numpy()
                batch_X_dec.append(vect_i[:-1]) #decoder input
                batch_Y.append(vect_i[1:]) #decoder target (including the end sentinel shifted right by 1)
            yield [np.array(batch_X_enc), np.array(batch_X_dec)], np.array(batch_Y)

TRAINING THE MODEL

In [26]:
#embed_model = tf.keras.models.load_model('fen_embed.h5')

In [85]:
embed_model = Word2Vec.load('move2vec2.bin')

In [90]:
#comb tried = [4,8,192,512,0.1]
num_layers = 4
num_heads = 8
key_dim = 20
ff_dim = 64
dropout = 0.1
model = transformer(num_layers, num_heads, seq_len_enc, seq_len_dec, key_dim, ff_dim, vocab_size, embed_model, dropout)
lr = CustomSchedule(key_dim)
optimizer = tf.keras.optimizers.Adam(lr, beta_1 = 0.9, beta_2 = 0.98, epsilon = 1e-9)
model.compile(loss = masked_loss, optimizer = optimizer, metrics = [masked_accuracy])

SAMPLE DATA

In [34]:
# v = vectorizer(themes_train[:1000]).numpy()
# Y = v[:, 1:]
# X_dec = v[:, :-1]
# X_enc = []
# z = [0]*768
# for i in ind_train[:1000]:
#   fens = [start_fens[i]]
#   board = chess.Board(start_fens[i])
#   for move in moves[i]:
#       board.push(move)
#       fens.append(board.fen())
#   batch_i_enc = [embed_fen(fen) for fen in fens]
#   batch_i_enc += [z]*(seq_len_enc - len(fens))
#   print(np.array(batch_i_enc).shape, len(fens))
#   X_enc.append(batch_i_enc)
# X = [np.array(X_enc), X_dec]

In [92]:
#input = [encoder input, decoder input]
#target = [decoder output]
batch_size = 64
epochs = 1
steps_per_epoch = len(ind_train[:100000]) // batch_size
#steps_per_epoch = 1000 // batch_size
train_data_generator = data_generator(moves_san, themes, ind_train[:100000], seq_len_enc, key_dim, embed_model, vectorizer, batch_size)
model.fit(train_data_generator, steps_per_epoch = steps_per_epoch, epochs = epochs)
#model.fit(X, Y, epochs = 1, batch_size = 32)
train_data_generator.close()



In [None]:
model.save('trans_model2.keras')

LOADING THE MODEL

In [None]:
custom_objects = {"PositionalEmbeddingDecoder": PositionalEmbeddingDecoder,
                  "PositionalEmbeddingEncoder": PositionalEmbeddingEncoder,
                  "CustomSchedule": CustomSchedule,
                  "masked_loss": masked_loss,
                  "masked_accuracy": masked_accuracy}
with tf.keras.utils.custom_object_scope(custom_objects):
    model_loaded = tf.keras.models.load_model("trans_model2.keras")

MODEL EVALUATION

In [31]:
# def tagging(start_fen, moves, seq_len_enc, vectorizer, transformer_model):
#   z = [0] * 768
#   lookup = list(vectorizer.get_vocabulary())
#   start_sentinel, end_sentinel = '[start]', '[end]'
#   output = [start_sentinel]
#   for i in range(seq_len_enc):
#     fens = [start_fen]
#     board = chess.Board(start_fen)
#     for move in moves:
#         board.push(move)
#         fens.append(board.fen())
#     X_enc = [embed_fen(fen) for fen in fens]
#     X_enc += [z] * (seq_len_enc - len(fens))
#     vector = vectorizer(' '.join(output)).numpy()
#     X_dec = vector[:-1]
#     pred = transformer_model.predict([np.array([X_enc]), np.array([X_dec])])
#     tag = lookup[np.argmax(pred[0, i, :])]
#     print(tag)
#     output.append(tag)
#     if tag == end_sentinel:
#       break
#   return output

In [80]:
def tagging(moves_san, seq_len_enc, key_dim, vectorizer, embed_model, transformer_model):
    z = [0] * key_dim
    lookup = list(vectorizer.get_vocabulary())
    start_sentinel, end_sentinel = '[start]', '[end]'
    output = [start_sentinel]
    X_enc = []
    for move in moves_san:
            if move[-1] == '#':
                move = move[:-1]
            try:
                X_enc.append(embed_model.mv[move])
            except:
                break
    #X_enc = [embed_fen(fen) for fen in fens]
    X_enc += [z] * (seq_len_enc - len(X_enc))
    for i in range(seq_len_enc):
        vector = vectorizer(' '.join(output)).numpy()
        X_dec = vector[:-1]
        pred = transformer_model.predict([np.array([X_enc]), np.array([X_dec])])
        tag = lookup[np.argmax(pred[0, i, :])]
        print(tag)
        output.append(tag)
        if tag == end_sentinel:
          break
    return output

In [81]:
themes[ind_test[1]]

'[start] defensiveMove [end]'

In [94]:
# tagging(start_fens[ind_test[100]], moves[ind_test[10]], seq_len_enc, vectorizer, model)
tagging(moves_san[ind_test[10]], seq_len_enc, key_dim, vectorizer, embed_model, model)

forcedMate
[end]


['[start]', 'forcedMate', '[end]']