First, import libraries and define functions (to hang out in global scope).

In [None]:
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Conv2D, Dropout
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

import numpy as np
import matplotlib.pyplot as plt

import csv
from collections import defaultdict

def plot_weights(model):
    for layer in model.layers:
        W = layer.get_weights()[0]

        for i in range(W.shape[2]):
            for j in range(W.shape[3]):
                print(W[:,:,i,j])
                plt.subplot(W.shape[2], W.shape[3], 1+j+i*W.shape[3])
                plt.imshow((W[:,:,i,j]+1)/2, cmap="gray")
        plt.show()

def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]

def show_board(board):
    for row in board:
        for piece in row:
            if piece[2]:
                letter = 'K'
            elif piece[3]:
                letter = 'Q'
            elif piece[4]:
                letter = 'R'
            elif piece[5]:
                letter = 'N'
            elif piece[6]:
                letter = 'B'
            elif piece[7]:
                letter = 'P'
            else:
                letter = '.'

            if piece[1]:
                letter = letter.lower()

            print(letter, end='')
        print()

def show_moves(moves):
    for row in moves:
        for move in row:
            move = 'X' if move[0] else '.'
            print(move, end='')
        print()

print("Done importing.")

Next, define handcrafted filters and data representation specifications.

In [None]:
FILTERS = {
    'neutral': np.zeros((3,3,1)),
}
for j in range(3):
    for i in range(3):
        filter = np.zeros((3,3,1))
        filter[i][j] = 1
        FILTERS['pos'+str(i+j*3)] = filter
        
        
# def make_weight(filters):
    

# WEIGHTS = {
#     'whitebishop': make_weight({'white':'pos4','bishop':'pos4'})
    
#     [np.array(
#             #   Wh   Bl    K    Q    R    N    B    P   mv   ep
#             [[[[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]]],
#              [[[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 2],[ 0],[ 0],[ 0],[ 0],[ 0],[ 2],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]]],
#              [[[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]]]]
#         ),
#         np.array([-3])],
#     'pos0_whitebishop': [np.array(
#             #   Wh   Bl    K    Q    R    N    B    P   mv   ep   wb
#             [[[[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 1]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]]],
#              [[[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]]],
#              [[[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]],
#               [[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0],[ 0]]]]
#         ),
#         np.array([0])],
# }

# WEIGHTS['whitebishop'] = 

TRAINING_SIZE = 19000
TOTAL_SIZE = 31000

piece_to_id = {
	'K':[1,0,0,0,0,0],
	'Q':[0,1,0,0,0,0],
	'R':[0,0,1,0,0,0],
	'N':[0,0,0,1,0,0],
	'B':[0,0,0,0,1,0],
	'P':[0,0,0,0,0,1],
}

LAYERS = [
    'white',
    'black',
    'king',
    'queen',
    'rook',
    'knight',
    'bishop',
    'pawn',
    'has_moved',
    'enpassant',
]

cache = defaultdict(str)

print("Ok.")

Import training data!

In [None]:
X = []
Y = []
i = 0
for line in csv.reader(open('moves.csv'), delimiter='|'):
    i+= 1
    if i > TOTAL_SIZE:
        break

    player = 1 if int(line[0]) == 1 else 0
    if not player:
        continue

    found_bishop=False

    colors = [[1,0]]*16 + [[0,1]]*16
    pieces = [piece_to_id[char] for char in line[2].split(',')]
    has_moved = [([1] if n!='0' else [0]) for n in line[3].split(',')]
    enpassant = [[0]]*32
    if line[4]:
        enpassant[int(line[4])-1] = [1]

    squares = []
    for row in chunks(line[1].split(','), 8):
        rank = []
        for id in row:
            if not id:
                square = [0,0] + [0]*6 + [0] + [0]
            else:
                id = int(id)-1
                if pieces[id] == [0,0,0,0,1,0]:
                    found_bishop = True
                square = colors[id] + pieces[id] + has_moved[id] + enpassant[id]
            rank.append(square)
        squares.append(rank)

    if not found_bishop:
        continue

    X.append(squares)

    destinations = []
    for line in chunks(line[5].split(','), 8):
        destinations.append([(1 if n!='0' else 0) for n in line])
    Y.append(destinations)

X = np.array(X).astype('float32')
Y = np.array(Y)
Y = np.expand_dims(Y, 3)

print(len(X))

The heart of the matter! Find new features to add to the board representation.

In [None]:
model = Sequential()
model.add(Conv2D(1, (3, 3), padding='same', activation='relu', input_shape=(8,8,1)))

model.compile(
    loss=keras.losses.mean_squared_error,
    metrics=['accuracy', keras.metrics.TruePositives()]
)

best_accuracy = 0
best_TPR = 0
best_filter = ''
best_property = None
while 'neutral_white' not in LAYERS:
    for key, filter in FILTERS.items():
        model.layers[0].set_weights([
                np.expand_dims(np.array(filter), 3),
                np.array([0])
        ])
        # plot_weights(model)

        for i, X_slice in enumerate(np.split(X, X.shape[3], 3)):
            if key+'_'+LAYERS[i] in LAYERS:
                print("Already considering", key+'_'+LAYERS[i])
                continue
            if key+'_'+LAYERS[i] not in cache:
                cache[key+'_'+LAYERS[i]] = model.evaluate(X_slice, Y, verbose=0)
                print(key, LAYERS[i], 'accuracy:', cache[key+'_'+LAYERS[i]][1])
            score = cache[key+'_'+LAYERS[i]]
#             print(key, LAYERS[i], 'TPR:', score[2])
            if best_accuracy + 0.00000001 < score[1]:
                best_accuracy = score[1]
                best_property = i
                best_filter = key
            if key=='neutral':
                break

    if best_filter+'_'+LAYERS[best_property] in LAYERS:
        print("Could not improve result with basic filters.")
        break
                
    print(best_filter, 'got an accuracy of', best_accuracy, 'on', LAYERS[best_property])

    model.layers[0].set_weights([
        np.expand_dims(np.array(FILTERS[best_filter]), 3),
        np.array([0])
    ])

    Y_prob = (model.predict(np.split(X, X.shape[3], 3)[best_property]) > .5).astype('int')

    for i, y_prob in enumerate(Y_prob):
        if not (y_prob == Y[i]).all():
            show_board(X[i])
            print()
            show_moves(y_prob)
            print()
            show_moves(Y[i])
            print('-----')
            break
            
    X = np.concatenate((X, Y_prob), 3)
    LAYERS.append(best_filter+'_'+LAYERS[best_property])
    print("Appended", best_filter+'_'+LAYERS[best_property], 'to LAYERS')

Use the generated board representation to train a simple network that finds the final answer.

In [None]:
X_train = X[:TRAINING_SIZE]
Y_train = Y[:TRAINING_SIZE]
X_test  = X[TRAINING_SIZE:]
Y_test  = Y[TRAINING_SIZE:]

model = Sequential()
model.add(Conv2D(5, (3, 3), padding='same', activation='relu', input_shape=X[0].shape))
model.add(Conv2D(1, (3, 3), padding='same', activation='relu'))

model.compile(
    loss=keras.losses.mean_squared_error,
#     optimizer=keras.optimizers.Adam(
#         learning_rate=0.1,
#         beta_1=0.9,
#         beta_2=0.999,
#         amsgrad=False,
#     ),
    optimizer=keras.optimizers.SGD(lr=0.1, nesterov=True),
#     optimizer=keras.optimizers.Adadelta(),
    metrics=['accuracy']
)
model = load_model('best_model.h5')
history = model.fit(
    X_train,
    Y_train,
    batch_size=128,
    epochs=50,
    verbose=1,
    validation_data=(X_test, Y_test),
    shuffle=True,
    callbacks = [
        ModelCheckpoint(
            filepath='best_model.h5',
            monitor='val_loss',
            save_best_only=True,
            verbose=1,
        )]
)

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

Display positions that the model fails on, with the predicted and correct output.

In [None]:
Y_prob = (model.predict(X_test) > .5).astype('int')

for i, y_prob in enumerate(Y_prob):

    if not (y_prob == Y_test[i]).all():
        show_board(X_test[i])
        print()
        show_moves(y_prob)
        print()
        show_moves(Y_test[i])
        print('-----')

Some numbers...

In [None]:
plot_weights(model)