In [1]:
import os
import chess.pgn
from tqdm.notebook import tqdm
import json
import chess
import subprocess
from lib import bitMapFile, boardToBitMap

In [2]:
output_dir = "./PGN/"
os.makedirs(output_dir, exist_ok=True)

# Base URL
base_url = "http://www.bookuppro.com/ecopgn/"

# Letters and numbers to iterate over
letters = "ABCDE"
numbers = [f"{i:02}" for i in range(100)]  # '00' to '99'

In [3]:
def parse_pgn_file(file_path):
    """Parse a PGN file containing multiple games."""
    if not os.path.exists(file_path):
        return []
    games = []
    with open(file_path, 'r') as pgn_file:
        while True:
            game = chess.pgn.read_game(pgn_file)
            if game is None:
                break
            games.append(game)
    return games


def get_moves_as_uci(game):
    """Extract the moves of a game in UCI format."""
    return [move.uci() for move in game.mainline_moves()]

In [4]:
# allMoves = {}
# gameCount = 0

# pb = tqdm( total=len(letters)*len(numbers) )
# for letter in letters:
#     for num in numbers:
#         pb.update(1)
#         file_path = f"./PGN/{letter}{num}.pgn"
#         games = parse_pgn_file(file_path)
#         for idx, game in enumerate(games):

#             result_str = game.headers.get("Result", "")
#             if result_str == "1-0":
#                 result = 1
#             elif result_str == "0-1":
#                 result = -1
#             elif result_str == "1/2-1/2":
#                 result = 0
#             else:
#                 result = None

#             uciMoves = get_moves_as_uci(game)
#             if uciMoves:
#                 allMoves[ gameCount ] = {
#                     "result": result,
#                     "moves": uciMoves
#                 }
#                 gameCount += 1

In [5]:
openings = []

with open("openings.json", "r") as f:
    openings = json.load(f)

In [6]:
class Trie:

    def __init__( self, move ):
        self.move = move
        self.white = 0
        self.black = 0
        self.total = 0
        self.children = {}

    def childrenSummary( self ):
        summary = {}
        for move, child in self.children.items():
            summary[ move ] = {
                "white": child.white,
                "black": child.black,
                "total": child.total,
            }
        return summary

    def combinedChildrenSummary( self, existingSummary ):
        newSummary = self.childrenSummary()
        for move in newSummary:
            if move in existingSummary:
                existingSummary[ move ][ "white" ] += newSummary[ move ][ "white" ]
                existingSummary[ move ][ "black" ] += newSummary[ move ][ "black" ]
                existingSummary[ move ][ "total" ] += newSummary[ move ][ "total" ]
            else:
                existingSummary[ move ] = newSummary[ move ]
        return existingSummary

    def __str__( self ):
        return f"MOVE: { self.move }:\nChildren: { list( self.children ) }\nWhite: {self.white}\nBlack: {self.black}\nTotal: {self.total}"

totalMoves, totalNodes = 0, 1

In [7]:
root = Trie( 'root' )
root.total = float( 'inf' )
currNode = root
print(root)

for i, game in tqdm( list( enumerate( openings.values() ) ) ):
    moves, result = game[ 'moves' ], game[ 'result' ]
    for j, move in enumerate( moves ):
        if j >= 31: break # Openings generally doesn't exceed more than 30 half moves
        target = None
        if move in currNode.children:
            target = currNode.children[ move ]
        else:
            target = Trie( move )
            currNode.children[ move ] = target
            totalNodes += 1
        if result == 1: target.white += 1
        elif result == -1: target.black -= 1
        target.total += 1
        currNode = target
        totalMoves += 1
    currNode = root

MOVE: root:
Children: []
White: 0
Black: 0
Total: inf


  0%|          | 0/1087171 [00:00<?, ?it/s]

In [8]:
totalMoves, totalNodes

(33142925, 16656191)

In [9]:
openingDB = {}
TOTAL_THRESHOLD = 2

def processQueue( queue ):
    # Save files
    for i, ( bitmap, _ ) in enumerate( queue ):
        bitMapFile( f'./tmp/{i}.json', bitMap=bitmap, isRead=False )
    
    # Get hashes
    output = subprocess.run(["../target/release/mystic-bot", "./tmp"], capture_output=True)
    hashes = output.stdout.decode('utf-8').split('\n')
    hashes.pop()
    hashes = [ int( x.split()[ -1 ] ) for x in hashes ]

    assert len( hashes ) == len( queue )

    # Update DB
    for i, ( _, node ) in enumerate( queue ):
        if node == root.children[ 'b1c3' ].children[ 'e7e5' ]:
            print( hashes[ i ] )
        if hashes[ i ] in openingDB:
            openingDB[ hashes[ i ] ] = node.combinedChildrenSummary( openingDB[ hashes[ i ] ] )
        else:
            openingDB[ hashes[ i ] ] = node.childrenSummary()
            if len( openingDB ) % 10000 == 0:
                print( f"OpeningDB Size: { len( openingDB ) }" )

    # Remove stale files
    for i in range( len( queue ) ):
        os.remove( f'./tmp/{i}.json' )
    queue.clear()

def openingHelper( currNode, nodeList, pb, hashingQueue ):

    if len(currNode.children) == 0:
        # If there are no further moves return
        # return
        return

    if len( hashingQueue ) >= 500:
        processQueue( hashingQueue )
        hashingQueue = []

    if currNode.total >= TOTAL_THRESHOLD:
        # Only process node if enough games have reached the position
        board = chess.Board()
        for node in nodeList[1:]:
            board.push_uci( node.move )
        bitMap = boardToBitMap( board )
        hashingQueue.append( ( bitMap, currNode ) )

    for nextNode in currNode.children.values():
        nodeList.append(nextNode)
        pb.update( 1 )
        openingHelper( nextNode, nodeList, pb, hashingQueue )
        nodeList.pop()

hashingQueue = []

pb = tqdm(total=totalNodes)
# openingHelper( root, [ root ], pb, hashingQueue )

  0%|          | 0/16656191 [00:00<?, ?it/s]

In [15]:
root.children[ 'b1c3' ].children[ 'e7e5' ]

<__main__.Trie at 0x11d2fd16cd0>

In [2]:
board = chess.Board()
bitMap = boardToBitMap( board )
bitMapFile( 'tmp.json', bitMap=bitMap, isRead=False )

In [5]:
board.ep_square

44