In [1]:
from glob import glob
from itertools import product
import json
import os
from pathlib import Path
import random
import re

import chess
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import tensorflow as tf

In [2]:
tqdm.pandas()

In [3]:
%cd ..

/Users/robertperrotta/github/reconchess-tools


In [4]:
from reconchess.utilities import add_pawn_queen_promotion
from reconchess_tools.utilities import simulate_sense, simulate_move, possible_requested_moves, without_opponent_pieces
from reconchess_tools.strategy import non_dominated_sense_by_own_pieces

In [5]:
num_channels = 19
[
    # My pieces are indexed as piece_type - 1
    MY_PAWNS,
    MY_KNIGHTS,
    MY_BISHOPS,
    MY_ROOKS,
    MY_QUEENS,
    MY_KING,
    # My castling rights are flags at the rook locations
    MY_CASTLING_RIGHTS,
    # Sense shows squares seen and opponent pieces by type as piece_type + 7
    WAS_SENSED,
    OP_PAWNS_SEEN,
    OP_KNIGHTS_SEEN,
    OP_BISHOPS_SEEN,
    OP_ROOKS_SEEN,
    OP_QUEENS_SEEN,
    OP_KING_SEEN,
    # Move info includes from-square, requested to-square, resulting to-square, capture-square
    REQUESTED_FROM_SQUARE,
    REQUESTED_TO_SQUARE,
    TAKEN_TO_SQUARE,
    CAPTURE_SQUARE,
    # Opponent capture square is recorded, too
    OP_CAPTURE_SQUARE,
] = list(range(num_channels))

In [6]:
DIFFS = [
    # Pass
    0,
    # Sliding moves
    *[
        k * i
        for i in range(1, 8)
        for k in [-1, -7, -8, -9, 1, 9, 8, 7]
    ],
    # Knight moves
    -10, -17, -15, -6, 10, 17, 15, 6,
]
num_move_types = len(DIFFS)

In [24]:
file = glob("notebooks/white_transitions/*.pkl")[0]
file

'notebooks/white_transitions/39799.pkl'

In [25]:
df = pd.read_pickle(file)
df

Unnamed: 0,mask,action,stack,mask after,is_terminal,future payout
0,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
1,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, False, Fa...",False,-1
2,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
3,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, False, Fa...",False,-1
4,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
...,...,...,...,...,...,...
68,"[[[True, False, False, False, False, True, Fal...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
69,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, True, Fal...",False,-1
70,"[[[True, False, False, False, False, True, Fal...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
71,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, True, Fal...",False,-1


In [111]:
is_over = np.array(df["is_terminal"])
reward = np.array(df["is_terminal"] * df["future payout"])
mask = np.stack(df["mask"])
action = np.stack(df["action"])
mask_after = np.stack(df["mask after"])
stack = tf.keras.preprocessing.sequence.pad_sequences([x[:-1] for x in df["stack"]])
stack_after = tf.keras.preprocessing.sequence.pad_sequences(df["stack"])

reward.shape, mask.shape, action.shape, mask_after.shape, stack.shape, stack_after.shape

((73,),
 (73, 8, 8, 66),
 (73, 8, 8, 66),
 (73, 8, 8, 66),
 (73, 72, 8, 8, 19),
 (73, 73, 8, 8, 19))

In [100]:
t_stack = tf.keras.Input((None, 8, 8, num_channels))
t_mask = tf.keras.Input((8, 8, num_move_types + 1))

x = t_stack
padding_mask = tf.math.reduce_any(t_stack != 0, axis=[-3, -2, -1])

# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, 3, padding="same"))(x, padding_mask)
# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, 3, padding="same"))(x, padding_mask)
# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, 3, padding="same"))(x, padding_mask)
# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Reshape((1, -1,)))(x, padding_mask)[:, :, 0, :]

# x = tf.keras.layers.LSTM(128)(x, mask=padding_mask)

x = tf.keras.layers.ConvLSTM2D(128, 3)(x, mask=padding_mask)

for _ in range(5):
    x = tf.keras.layers.Conv2D(64, 3, padding="same")(x)
x = tf.keras.layers.Flatten()(x)
for n in [64, 32]:
    x = tf.keras.layers.Dense(n, "relu")(x)
x = tf.keras.layers.Dense(8 * 8 * (num_move_types + 1))(x)
value = tf.keras.layers.Reshape((8, 8, num_move_types + 1))(x) * t_mask

critic = tf.keras.Model([t_stack, t_mask], value)

critic.summary()

Model: "model_13"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_105 (InputLayer)          [(None, None, 8, 8,  0                                            
__________________________________________________________________________________________________
tf.__operators__.ne_30 (TFOpLam (None, None, 8, 8, 1 0           input_105[0][0]                  
__________________________________________________________________________________________________
tf.math.reduce_any_31 (TFOpLamb (None, None)         0           tf.__operators__.ne_30[0][0]     
__________________________________________________________________________________________________
conv_lst_m2d_27 (ConvLSTM2D)    (None, 6, 6, 128)    677888      input_105[0][0]                  
                                                                 tf.math.reduce_any_31[0][0

In [101]:
t_stack = tf.keras.Input((None, 8, 8, num_channels))
t_mask = tf.keras.Input((8, 8, num_move_types + 1))

x = t_stack
padding_mask = tf.math.reduce_any(t_stack != 0, axis=[-3, -2, -1])

# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, 3, padding="same"))(x, padding_mask)
# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, 3, padding="same"))(x, padding_mask)
# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(64, 3, padding="same"))(x, padding_mask)
# x = tf.keras.layers.TimeDistributed(tf.keras.layers.Reshape((1, -1,)))(x, padding_mask)[:, :, 0, :]

# x = tf.keras.layers.LSTM(128)(x, mask=padding_mask)

x = tf.keras.layers.ConvLSTM2D(128, 3)(x, mask=padding_mask)

for _ in range(5):
    x = tf.keras.layers.Conv2D(64, 3, padding="same")(x)
x = tf.keras.layers.Flatten()(x)
for n in [64, 32]:
    x = tf.keras.layers.Dense(n, "relu")(x)
x = tf.keras.layers.Dense(8 * 8 * (num_move_types + 1))(x)
logits = tf.keras.layers.Reshape((8, 8, num_move_types + 1))(x)
policy = tf.keras.layers.Softmax(axis=[-3, -2, -1])(logits, t_mask)

actor = tf.keras.Model([t_stack, t_mask], policy)

actor.summary()

Model: "model_14"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_107 (InputLayer)          [(None, None, 8, 8,  0                                            
__________________________________________________________________________________________________
tf.__operators__.ne_31 (TFOpLam (None, None, 8, 8, 1 0           input_107[0][0]                  
__________________________________________________________________________________________________
tf.math.reduce_any_32 (TFOpLamb (None, None)         0           tf.__operators__.ne_31[0][0]     
__________________________________________________________________________________________________
conv_lst_m2d_28 (ConvLSTM2D)    (None, 6, 6, 128)    677888      input_107[0][0]                  
                                                                 tf.math.reduce_any_32[0][0

In [107]:
mask[1, :, :, -1].astype(int)

array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 1, 1, 0, 0],
       [0, 1, 1, 1, 1, 1, 1, 0],
       [0, 1, 1, 1, 1, 1, 1, 0],
       [0, 1, 1, 1, 1, 1, 1, 0],
       [0, 1, 1, 1, 1, 1, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]])

In [102]:
(actor.predict([stack, mask])[1, :, :, -1] * 10000).astype(int) / 100

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 3.7 , 3.7 , 3.7 , 0.  , 0.  ],
       [0.  , 3.7 , 3.7 , 3.7 , 3.7 , 3.7 , 3.7 , 0.  ],
       [0.  , 3.7 , 3.7 , 3.7 , 3.7 , 3.7 , 3.69, 0.  ],
       [0.  , 3.7 , 3.7 , 3.7 , 3.7 , 3.7 , 3.7 , 0.  ],
       [0.  , 3.7 , 3.7 , 3.7 , 3.7 , 3.7 , 3.7 , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ]])

In [105]:
(critic.predict([stack, mask])[1, :, :, -1] * 10000).astype(int) / 100

array([[ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  , -0.02, -0.01,  0.05,  0.  ,  0.  ],
       [ 0.  , -0.03,  0.04, -0.07, -0.03, -0.05,  0.01,  0.  ],
       [ 0.  ,  0.  ,  0.03,  0.02,  0.03, -0.02,  0.06,  0.  ],
       [ 0.  ,  0.05, -0.05, -0.02,  0.05,  0.02,  0.07,  0.  ],
       [ 0.  , -0.02, -0.05,  0.01,  0.  ,  0.09,  0.04,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ]])

In [116]:
discount = 0.95

t_stack = tf.keras.Input((None, 8, 8, num_channels))
t_mask = tf.keras.Input((8, 8, num_move_types + 1))
t_action = tf.keras.Input((8, 8, num_move_types + 1))
t_is_over = tf.keras.Input((1,))
t_reward = tf.keras.Input((1,))

policy = actor([t_stack, t_mask])
state_action_values = critic([t_stack, t_mask])

est_state_value = tf.math.reduce_sum(policy * state_action_values, axis=[-3, -2, -1])
state_value = est_state_value * discount * (1 - t_is_over) + t_reward
state_action_value = tf.math.reduce_sum(t_action * state_action_values, axis=[-3, -2, -1])

value_loss = tf.keras.losses.Huber()(state_value, state_action_value)

value_loss

<KerasTensor: shape=() dtype=float32 (created by layer 'tf.cast_4')>

In [124]:
sparse = tf.sparse.from_dense(stack)
sparse

<tensorflow.python.framework.sparse_tensor.SparseTensor at 0x7fb881c24250>

In [137]:
sparse.shape, tf.sparse.concat(0, [tf.sparse.from_dense(x[None, :-1]) for x in df["stack"]], expand_nonconcat_dims=True).shape

(TensorShape([73, 72, 8, 8, 19]), TensorShape([73, 72, 8, 8, 19]))

In [None]:
def sparsify(series):
    return tf.sparse.concat(0, [
        tf.sparse.from_dense(x[None]) for x in series
    ], expand_nonconcat_dims=True)


is_over = []
reward = []
mask = []
action = []
mask_after = []
stack = []
stack_after = []

for file in tqdm(glob("notebooks/white_transitions/*.pkl")):
    df = pd.read_pickle(file)
    
    is_over.append(tf.sparse.from_dense(df["is_terminal"]))
    reward.append(tf.sparse.from_dense(df["is_terminal"] * df["future payout"]))
    mask.append(sparsify(df["mask"]))
    action.append(sparsify(df["action"]))
    mask_after.append(sparsify(df["mask after"]))
    stack.append(sparsify([x[:-1] for x in df["stack"]]))
    stack_after.append(sparsify(df["stack"]))

In [144]:
is_over = tf.sparse.concat(0, is_over, expand_nonconcat_dims=True)
reward = tf.sparse.concat(0, reward, expand_nonconcat_dims=True)
mask = tf.sparse.concat(0, mask, expand_nonconcat_dims=True)
action = tf.sparse.concat(0, action, expand_nonconcat_dims=True)
mask_after = tf.sparse.concat(0, mask_after, expand_nonconcat_dims=True)
stack = tf.sparse.concat(0, stack, expand_nonconcat_dims=True)
stack_after = tf.sparse.concat(0, stack_after, expand_nonconcat_dims=True)

reward.shape, mask.shape, action.shape, mask_after.shape, stack.shape, stack_after.shape

(TensorShape([28089]),
 TensorShape([28089, 8, 8, 66]),
 TensorShape([28089, 8, 8, 66]),
 TensorShape([28089, 8, 8, 66]),
 TensorShape([28089, 790, 8, 8, 19]),
 TensorShape([28089, 791, 8, 8, 19]))

Unnamed: 0,mask,action,stack,mask after,is_terminal,future payout
0,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
1,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, False, Fa...",False,-1
2,"[[[True, False, False, False, False, False, Fa...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
3,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, True, Fal...",False,-1
4,"[[[True, False, False, False, False, True, Fal...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
5,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, True, Fal...",False,-1
6,"[[[True, False, False, False, False, True, Fal...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
7,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, True, Fal...",False,-1
8,"[[[True, False, False, False, False, True, Fal...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[False, False, False, False, False, False, F...",False,-1
9,"[[[False, False, False, False, False, False, F...","[[[False, False, False, False, False, False, F...",[[[[False False False True False False True ...,"[[[True, False, False, False, False, True, Fal...",False,-1
