In [1]:
from IPython.core.display import HTML
with open('./style.css') as f:
    css = f.read()
HTML(css)

<h1>Building a Tablebase</h1>

In [20]:
import chess                                       # Simulate the chess game
import chess.gaviota                               # Load Gaviota Tablebase
from IPython.display import display, clear_output  # Better visualization and display of the chess board
import random                                      # Random moves and random creation of endgame positions
from typing import Union, List, Set                # Types to enable direct method signatures
import re     
import sqlite3
import pickle
import os.path

In [21]:
DIRECTORIES = {
    'S_SETS': 's_sets',
    'TABLEBASES': 'tables'
}

<h2>Board representation</h2>
<div style="text-align: justify">
In order to build a large tablebase capable of storing hundreds of thousands of possible board positions, it is important to choose an effective representation of a board to store. At first, one possibility might be to store the board using the FEN-string notation, however, strings use a large amount of memory. Encoding a single board in a smaller datatype, makes it possible for us to do computations with larger datasets in memory. We do this by implementing two helper functions <b><code>board_to_int</code></b> and <b><code>int_to_board</code></b>. They enable us to encode a chess board given the current endgame as <b><code>pieces_str</code></b> as an integer and vice versa.
<br/><br/>
<h3>Encoding</h3>
<div style="text-align: justify">
The basic idea is to assign a byte within a python bytearray to each piece on the board. The value held by the byte is the position of the piece where each field is given an value between 0 and 63. The <b><code>piece_map</code></b> method within python-chess returns a dictionary containing each piece and its field. We shall use this dictionary to iterate over the chess pieces and assign the bytes. If a piece has been captured the corresponding byte will be set to <b>0xff</b>. The last byte will hold the turn information, which is stored as well as either one or zero. Finally, the resulting bytearray is encoded as a single integer and returned. 
<br/><br/>
<b>Remark:</b> Python does in fact store all integers in at least 28 bytes, which makes further optimisation of the encoding unecessary. 
</div>

In [22]:
def board_to_int(board, pieces_str):
    piece_map = board.piece_map()
    representation = bytearray(len(pieces_str) + 1)
    for i, piece_symbol in enumerate(pieces_str):
        position_list = [pos for pos, piece in piece_map.items() if piece == chess.Piece.from_symbol(piece_symbol)]
        if not position_list: 
            representation[i] = 0xFF
        else:
            piece_map.pop(position_list[0])
            representation[i] = position_list[0]
    representation[-1] = board.turn
    return int.from_bytes(representation, 'little')

<h3>Decoding</h3>
<div style="text-align: justify">
Similarly, to decode an integer, the bytearray is transformed back into a piece mapping. The resulting dictionary can be used to initialize a new chess board. 
</div>

In [23]:
def int_to_board(representation, pieces_str):
    board = chess.Board(None)
    byte_representation = representation.to_bytes(len(pieces_str)+1, "little")
    mapping = dict(zip(byte_representation[:-1], [chess.Piece.from_symbol(piece_symbol) for piece_symbol in pieces_str]))
    for pos in mapping.keys():
        if pos == 0xFF: mapping.pop(pos)
    board.set_piece_map(mapping)
    board.turn = byte_representation[-1]
    return board

To demonstrate this, we will create a board from a given FEN string, encode it as an integer and then transform it back into FEN representation.

In [24]:
fen = '3K4/8/4k3/8/8/8/8/2r5 b - - 0 1'
board = chess.Board(fen)
representation = board_to_int(board, 'Kkr')
new_board = int_to_board(representation, 'Kkr')
print(f"FEN to INT: '{fen}' -> {representation}")
print(f"INT to FEN: {representation} -> '{new_board.fen()}'")

FEN to INT: '3K4/8/4k3/8/8/8/8/2r5 b - - 0 1' -> 142395
INT to FEN: 142395 -> '3K4/8/4k3/8/8/8/8/2r5 b - - 0 1'


<h2>Retrograde Analysis</h2>
<div style="text-align: justify">
We shall use the following algorithm to calculate the depth-to-mate for every possible board within a given endgame. 
</div>
<br/><br/>
<h3>Calculating All Possible Permutations</h3>
<div style="text-align: justify">
Initially we have to calculate a set <b><code>s</code></b> containing all legal positions for a given endgame. We do this by iterating each chess piece over each chess field from 0-63. The helper function <b><code>shift_positions</code></b> takes a list containing the position of each piece on the board and will shift a single piece one field further, in order to generate the next permutation. 
</div>

In [25]:
def shift_positions(positions):
    for i, pos in enumerate(positions[::-1]):
        if pos == 63: 
            positions[-(i+1)] = 0
            continue
        else: 
            positions[-(i+1)] += 1
            break
    return positions

If a piece has reached the end of the board, it will be set back to the beginning and increment the next piece. 

In [26]:
shift_positions([0, 1, 63, 63])

[0, 2, 0, 0]

The <b><code>generate_s</code></b> function receives a string of pieces for which it generates the set <b><code>s</code></b> containg all legal positions. The algorithm iterates over each turn, (black and white) and further iterates over all permutations of field positions. Each permutation is mapped onto the pieces string and loaded onto a board. Next, we must check if it is a valid board using the <b><code>is_valid</code></b> method of the chess library. If so, we can encode the board as an int and add it to the set <b><code>s</code></b>. 
<br/><br/>
<b>Remark:</b> Within this function we included print statements showing the current progress as the generation process may take several minutes. This has been done for several of the following functions as well. 

In [27]:
def split_into_substrs(pieces_str):
    substrings = {pieces_str}
    if len(pieces_str) == 4:
        non_king_pieces = pieces_str.replace('K', '').replace('k', '')
        for p in non_king_pieces:
            substrings.add(pieces_str.replace(p, '', 1))
    return substrings

In [28]:
endgames = ['KRk', 'KQk', 'KBBk', 'KNNk', 'KBNk', 'KQkr', 'Kkr', 'Kkq', 'Kkbb', 'Kknn', 'Kkbn', 'KRkq']
for e in endgames:
    print("{:^4} => {}".format(e, str(split_into_substrs(e))))

KRk  => {'KRk'}
KQk  => {'KQk'}
KBBk => {'KBk', 'KBBk'}
KNNk => {'KNNk', 'KNk'}
KBNk => {'KBNk', 'KBk', 'KNk'}
KQkr => {'KQk', 'KQkr', 'Kkr'}
Kkr  => {'Kkr'}
Kkq  => {'Kkq'}
Kkbb => {'Kkb', 'Kkbb'}
Kknn => {'Kknn', 'Kkn'}
Kkbn => {'Kkb', 'Kkn', 'Kkbn'}
KRkq => {'KRk', 'Kkq', 'KRkq'}


In [29]:
def load_s_if_exists(pieces_str):
    path = f'./{DIRECTORIES["S_SETS"]}/{pieces_str}'
    if os.path.exists(path):
        with open(path, 'rb') as f:
            print(f"Loading s from {path}")
            return pickle.load(f)
    return set()

In [30]:
def save_s(pieces_str, s):
    path = f'./{DIRECTORIES["S_SETS"]}/{pieces_str}'
    with open(path, 'wb') as f:
        pickle.dump(s, f)

In [31]:
def generate_s(pieces_str):
    s = load_s_if_exists(pieces_str)
    if s == set():
        substrs = split_into_substrs(pieces_str)
        print(f"{pieces_str} needs the following substrs: {substrs}")
        for substr in substrs:
            s.union(generate_sub_s(substr))
        if len(pieces_str) == 4:
            save_s(pieces_str, s)
    return s

In [32]:
def generate_sub_s(pieces_str):
    
    print(f"[+] Generating {pieces_str}")
    
    sub_s = load_s_if_exists(pieces_str)
    
    if sub_s == set():
        board = chess.Board(None)
        for turn in [chess.WHITE, chess.BLACK]:
            board.turn = turn
            positions = [0] * len(pieces_str)
            while positions != [63] * len(pieces_str):
                if positions[-1] == 63 and positions[-2] == 63:
                    print("\r", end="")
                    print(f"{positions} -> {int(100/126*(positions[0] if turn == chess.WHITE else positions[0]+63))}%", end="")

                mapping = dict(zip(positions, [chess.Piece.from_symbol(piece) for piece in pieces_str]))
                board.set_piece_map(mapping)
                if board.is_valid() and len(set(positions)) == len(pieces_str):
                    sub_s.add(board_to_int(board, pieces_str))
                positions = shift_positions(positions)
                board.clear_board()
    
        if len(pieces_str) == 3:
            save_s(pieces_str, sub_s)
                
    return sub_s



<div style="text-align: justify">
(b) Anschließend werden alle Stellungen aus der Menge S gesucht, in denen der Spieler, der am Zug ist,
       schachmatt ist.  Diese Stellungen werden zu einer Menge S_0 zusammengefasst.  Außerdem werden diese
       Stellungen aus der Menge S entfernt.
</div>
<br>
S: Alle validen Stellungen, Weiß ist am Zug und schwarz ist am Zug
<br>
S0: Alle Stellungen in denen der Spieler, der am Zug ist, schachmatt ist
<br>
S1: Alle Stellungen in denen der Spieler, der am Zug ist, den Gegner mit einem Zug schachmatt setzen kann
<br>
S2: Alle Stellungen in denen der Spieler, der am Zug ist, nur Züge hat, die in S1 enden
<br>
Sn (n is unequal): Alle Stellungen in denen der Spieler, der am Zug ist, einen Zug hat, der in Sn-1 endet
<br>
Sn (n is equal): Alle Stellungen in denen der Spieler der am Zug ist, nur Züge hat, die in Sn-1 enden

<h3>Calculating All Mate Positions</h3>
<div style="text-align: justify">
Next we want to calculate all positions within <b><code>s</code></b> in which the player whose turn it is, is checkmate. These positions are subtracted from the set <b><code>s</code></b> and are stored in the set <b><code>s_0</code></b>. We can easily probe for checkmate using the <b><code>is_checkmate</code></b> method of the chess library. 
</div>

In [33]:
def generate_mate_positions(s, pieces_str):
    s_0 = set()
    for i, representation in enumerate(s):
        if i%1000 == 0: 
            print("\r", end="")
            print(f"{i}/{len(s)} -> {int((i/len(s))*100)}%", end="")
        if int_to_board(representation, pieces_str).is_checkmate():
            s_0.add(representation)  
    s = set(s) - s_0
    return s, s_0

<h2>The Basic Algorithm</h2>
<div style="text-align: justify">
The idea is to continue to develop the next set <b><code>s_{n+1}</code></b> and subtract them from <b><code>s</code></b> until either the next set is empyty, or <b><code>s</code></b> is empty. We will then have a series of sets <b><code>s_n</code></b> where every possible chess board position is either in one of <b><code>s_n</code></b> or in <b><code>s</code></b>. We can then define the depth-to-mate (<b>DTM</b>) value of each position as the value of <b><code>n</code></b> if a position is in the set <b><code>s_n</code></b>. This is the minimum number of moves required to force checkmate the other player (given an optimal strategy). If a position is still in <b><code>s</code></b>, the player whose turn it is can not force a checkmate on the other player in this position. 
</div>

<br>
<h3>Developing Sets With An Odd DTM</h3>
<div style="text-align: justify">
We already have <b><code>s_0</code></b>. Our next goal is to calculate all positions within <b><code>s</code></b> in which a player can reach a position within <b><code>s_0</code></b> in a single move. We will store these positions in the set <b><code>s_1</code></b>. These are positions in which the player whose turn it is can immediately checkmate the other player in his next move. 
</div>
<br>
<div style="text-align: justify">
Now we want to this for every next set <b><code>s_n</code></b> where n is odd. These are the sets in which it is the winning players turn, as he will set his opponent checkmate from a position in <b><code>s_1</code></b>. 
</div>

In [34]:
def generate_s_odd(s, s_prev, pieces_str):
    s_next = set()
    
    for i, representation in enumerate(s):

        if i%1000 == 0: 
            print("\r", end="")
            print(f"{i}/{len(s)} -> {int((i/len(s))*100)}%", end="")
        
        board = int_to_board(representation, pieces_str)
        for move in board.legal_moves:
            board.push(move)

            if board_to_int(board, pieces_str) in s_prev:
                board.pop()
                s_next.add(board_to_int(board, pieces_str))
                break

            board.pop()

    s = s - s_next
    return s, s_next

<div style="text-align: justify">
The function <b><code>generate_s_odd</code></b> takes three arguments:
<ul>
<li>The initial set <b><code>s</code></b> containing all positions which have not been placed in previous sets yet</li>
<li>The previous set <b><code>s_prev</code></b> => <code>s_{n-1}</code></li>
<li>A string <b><code>pieces_str</code></b> which contains all pieces which are currently on the board</li>
</ul>
We will then iterate over all positions in the set <b><code>s</code></b>. For each positions we will iterate over the next possible legal moves, which we can query using the built-in <b><code>.legal_moves</code></b> attribute. Each move will be applied to the current position. The resulting position will then be looked up in the previous set <b><code>s_prev</code></b>, to determine if the current move leads to a position with a lower depth-to-mate. As soon as such a position is found, we will revert the move and add the original position to our resulting set <b><code>s_next</code></b>. The algorithm will then move on to the next position in <b><code>s</code></b>. Finally, we can subtract<b><code>s_next</code></b> from <b><code>s</code></b> and return both. 
</div>
<br>
<br>
<h3>Developing Sets With An Even DTM</h3>
<div style="text-align: justify">
The next set we develop will have an even depth-to-mate value. This means, it is the turn of the player trying to defend against a checkmate. For this reason we may only place positions in this set, in which the defending player will result in a position with a lower DTM, regardless of their move. This means we are forcing the player to move closer to his checkmate. 
</div>


In [35]:
def generate_s_even(s, l_odd, pieces_str):
    s_next = set()

    for i, representation in enumerate(s):

        if i%1000 == 0: 
            print("\r", end="")
            print(f"{i}/{len(s)} -> {int((i/len(s))*100)}%", end="")

        board = int_to_board(representation, pieces_str)
        legal_moves = board.legal_moves
        
        
        fail = not legal_moves #no legal moves for stalemate position
        
        for move in legal_moves:
            board.push(move)

            if not any(board_to_int(board, pieces_str) in s_odd for s_odd in l_odd):
                fail = True
                break

            board.pop()
        
        if not fail:
            s_next.add(board_to_int(board, pieces_str))

    s = s - s_next
    return s, s_next

<div style="text-align: justify">
The function <b><code>generate_s_even</code></b> takes three arguments:
<ul>
<li>The initial set <b><code>s</code></b> containing all positions which have not been placed in previous sets yet</li>
<li>A list  <b><code>l_odd</code></b> of all previous sets <b><code>s_n</code></b> where  <b><code>n</code></b> is odd.</li>
<li>A string <b><code>pieces_str</code></b> which contains all pieces which are currently on the board</li>
</ul>
We will then once again iterate over all positions in the set <b><code>s</code></b> and over each of the next possible legal moves from that position. Each move will be applied to the current position. We will then check if the move leads to at least one position in all previous sets <b><code>s_odd</code></b> in our list <b><code>l_odd</code></b>. If we find a single move for a position in which this is not true, we will not add that position to our new set <b><code>s_next</code></b> and move on to the next position. Only if we have made sure that every move leads to a lower DTM we may add the position into <b><code>s_next</code></b>. 
Finally, we can subtract<b><code>s_next</code></b> from <b><code>s</code></b> and return both. 
</div>
<br>
<b>Remark: </b> We will also make sure that we do not end in a stalemate, in which the player whose turn it is does not have any legal moves left. 
<br>
<br>
<h3>Tablebase Generation</h3>
<div style="text-align: justify">
Before we can go ahead and put it all together we have to consider how we want to store our sets. We chose the <b><code>pickle</code></b> module to write our list of sets as a single binary file to disk. 
</div>


In [36]:
def save_sets_as_binary(list_of_sets, pieces_str):
    with open(f"./tables/{pieces_str}", "wb") as f:
        pickle.dump(list_of_sets, f)

<div style="text-align: justify">
The final main function will now progressively develop our series of sets. Currently we are storing all sets in memory before the function has terminated, which can take up close to 5GB of RAM for a 4-man tablebase. The execution speed varies from machine to machine, but a 3-man tablebase can be created in under 24h using the free Deepnote cloud computing service. 
</div>

In [37]:
def main(pieces_str):
    
    print(f"Doing all positions")
    s = generate_s(pieces_str)
    print(f"\nLength of s = {len(s)}")
    print(f"--------------------------------------------------------------------------------------------")

    list_of_sets = []

    n = 0
    
    while True: 

        print(f"Doing s_{n}")

        if n == 0: 
            s, s_n = generate_mate_positions(s, pieces_str)
        elif n%2 == 1: 
            s, s_n = generate_s_odd(s, list_of_sets[-1], pieces_str)
        else: 
            s, s_n = generate_s_even(s, list_of_sets[1::2], pieces_str)
        
        if not s_n: break
        
        list_of_sets.append(s_n)

        print(f"\nLength of s_{n} = {len(s_n)}")
        print(f"New length of s = {len(s)}")
        print(f"--------------------------------------------------------------------------------------------")

        n += 1 

    save_sets_as_binary(list_of_sets, pieces_str)

<div style="text-align: justify">
In order to generate a tablebase we only need a string <b><code>pieces_str</code></b> specifying the pieces involved in the endgame situation. 
<br>
Initially, we will build our set <b><code>s</code></b> containing all possible board situations using our function <b><code>generate_s</code></b>. 
<br>
Then we will enter the main loop, where we shall use <b><code>n</code></b> as a counter to develop the next set <b><code>s_n</code></b>. At the start of each loop we have three cases:
<ol>
<li>In the first run, <b><code>n</code></b> will be zero. This means <b><code>s_n</code></b> and <b><code>s</code></b> will be generated using the <b><code>generate_mate_positions</code></b> function</li>
<li>If <b><code>n</code></b> is odd, we will generate <b><code>s_n</code></b> and <b><code>s</code></b> using the function <b><code>generate_s_odd</code></b> and supply the last item in our <b><code>list_of_sets</code></b> as the argument</li>
<li>If <b><code>n</code></b> is even, we will generate <b><code>s_n</code></b> and <b><code>s</code></b> using the function <b><code>generate_s_even</code></b> and supply every second item in our <b><code>list_of_sets</code></b> as the argument</li>
</ol>
At the end of each loop, we will append the calculated set <b><code>s_n</code></b> to our  <b><code>list_of_sets</code></b> and lastly increment <b><code>n</code></b>. If, however, a newly calculated set <b><code>s_n</code></b> is empty, we have found our last set and will break out of the main loop. Finally, we will store the resulting list of sets using our store function <b><code>save_sets_as_binary</code></b>
</div>

In [38]:
#main('Kknb')

<h2>Testing the Tablebase</h2>
<div style="text-align: justify">
We have now created a tablebase, which allows us to determine the depth-to-mate (DTM) for any given position af an endgame situation. Now we would like to verify our results and compare them to the Gaviota tablebase. We can do this by verifying that for every possible board position for which the Gaviota DTM equals our calculated DTM, the position is in fact stored within the correct set. Then we will also have to verify the other side: For every position in our list of sets, the Gaviota DTM must equal our calculated DTM.  
</div>

In [39]:
def test_dtm_with_gaviota(test_set, expected_dtm, pieces_str):

    with chess.gaviota.open_tablebase("./gaviota") as tablebase:
        
        #Check if every set in test set is in gaviota
        for rep in test_set:
            board = int_to_board(rep, pieces_str)
            gaviota_dtm = abs(tablebase.probe_dtm(board))
            if not gaviota_dtm == expected_dtm:
                    with open(f'./tests/{pieces_str}', 'a') as f3:
                        f3.write(f"Assertion failed for board {board} with gaviota dtm = {gaviota_dtm} but expected = {expected_dtm}\n")
            assert gaviota_dtm == expected_dtm, f"FAILED: {board.epd()} -> expected_dtm ({expected_dtm}) != gaviota_dtm ({gaviota_dtm})"
        

<div style="text-align: justify">
Our testing process makes use of the <b><code>test_dtm_with_gaviota</code></b> function. It receives three arguments:
<ul>
<li>The initial set <b><code>s</code></b> which contains all possible board postions for the given endgame</li>
<li>A set <b><code>test_set</code></b> which is the set currently under test. It contains all positions we calculated to have a specific DTM</li>
<li>The value <b><code>expected_dtm</code></b> which is the DTM we calculated for all positions within <b><code>test_set</code></b></li>
</ul>
Next we will open the Gaviota tablebase using the chess library. Both test cases will then be handled after another using assert statements. 
<br>
At the end, the function will only have run without an error if each position in <b><code>s</code></b> is in <b><code>test_set</code></b> only <b>iff</b> the Gaviota DTM of that position equals <b><code>expected_dtm</code></b>.

</div>

In [40]:
def test_dtm_zero(list_of_sets, s, pieces_str):
    with chess.gaviota.open_tablebase("./gaviota") as tablebase:
        c = 0
        for s_n in list_of_sets:
            s -= s_n
        
        for pos in s:
            c += 1
            board = int_to_board(pos, pieces_str)
            gaviota_dtm = tablebase.probe_dtm(board)
            assert gaviota_dtm == 0, f"FAILED: {board.epd()} -> expected_dtm ({0}) != gaviota_dtm ({gaviota_dtm})"
                
        print(f"\nFound {c} non-checkmate positions with DTM zero")

<div style="text-align: justify">
In order to test our tablebase we first have to generate <b><code>s</code></b>, which may take a minute or two. We can then load our sets into memory using <b><code>pickle</code></b>. The code below will iterate over all sets and call the <b><code>test_dtm_with_gaviota</code></b> function for every single one. 
</div>

In [41]:
def test_all_sets(pieces_str):
    if not os.path.exists(f"./tests/s_set_{pieces_str}"):
        s = generate_s(pieces_str)
        with open(f'./tests/s_set_{pieces_str}', 'wb') as f:
            pickle.dump(s, f)
    else:
        with open(f'./tests/s_set_{pieces_str}', 'rb') as f:
            s = pickle.load(f)
            print(f"Loading s for {pieces_str}")
    print(len(s))
    
    with open(f'./tables/{pieces_str}', 'rb') as f:
        list_of_sets = pickle.load(f)
        print(f"Loading list of sets for {pieces_str}")
    
    print("Testing for DTM = 0")
    # Verify that all positions in s, which are not in list of sets, have Gaviota DTM = 0
    test_dtm_zero(list_of_sets, s, pieces_str)
    
    # Check each set
    for i in range(0, len(list_of_sets)):
        print(f"Testing dtm values for set s_{i}...")
        
        test_dtm_with_gaviota(list_of_sets[i], i, pieces_str)
        
        with open(f'./tests/{pieces_str}', 'a') as f3:
            f3.write(f"s_{i} passed\n")
        print(f"s_{i} passed")

In [42]:
test_all_sets("KRkq")

Loading s for KRkq
19733336
Loading list of sets for KRkq
Testing for DTM = 0


AssertionError: FAILED: 1k4R1/8/8/8/4q3/8/8/3K4 b - - -> expected_dtm (0) != gaviota_dtm (53)

In [None]:
with open(f'./tables/KRkq', 'rb') as f:
        list_of_sets = pickle.load(f)

In [None]:
len(list_of_sets)

In [None]:
s_0 = list_of_sets[0]
s_3 = list_of_sets[3]

In [None]:
len(s_3)

In [None]:
with chess.gaviota.open_tablebase("./gaviota") as tablebase:
    one = 0
    two = 0
    for p in s_3:
        board = int_to_board(p, "KRkq")
        gaviota_dtm = tablebase.probe_dtm(board)
        if abs(gaviota_dtm) == 3:
            one += 1
        elif abs(gaviota_dtm) == 1:
            two += 1
print(len(s_3)-one, two)
        