In [1]:
import os
import chess.pgn
from tqdm.notebook import tqdm
import json
import chess
import subprocess
from lib import bitMapFile, boardToBitMap
import requests
from urllib.parse import urlsplit

In [2]:
output_dir = "/home/mystic/Programming/mystic-bot/notebooks/data/pgns/"
os.makedirs(output_dir, exist_ok=True)
links_path = "/home/mystic/Programming/mystic-bot/notebooks/data/files.html"
# url = "https://www.pgnmentor.com/files.html"

In [3]:
class Trie:

    def __init__( self, move ):
        self.move = move
        self.count = 0
        self.children = {}

    def childrenSummary( self ):
        summary = {}
        for move, child in self.children.items():
            summary[ move ] = {
                "white": child.white,
                "black": child.black,
                "total": child.total,
            }
        return summary

    def combinedChildrenSummary( self, existingSummary ):
        newSummary = self.childrenSummary()
        for move in newSummary:
            if move in existingSummary:
                existingSummary[ move ][ "white" ] += newSummary[ move ][ "white" ]
                existingSummary[ move ][ "black" ] += newSummary[ move ][ "black" ]
                existingSummary[ move ][ "total" ] += newSummary[ move ][ "total" ]
            else:
                existingSummary[ move ] = newSummary[ move ]
        return existingSummary

    def __str__( self ):
        return f"MOVE: { self.move }:\nChildren: { list( self.children ) }\nWhite: {self.white}\nBlack: {self.black}\nTotal: {self.total}"

In [6]:
root = {'move': 'root', 'count': 0, 'children': {}}

halfLimit = 16
movesSeen, dbSize = 0, 0
for file_name in tqdm(os.listdir(output_dir)):
    if file_name.endswith(".pgn"):
        file_path = os.path.join(output_dir, file_name)
        with open(file_path, "r", encoding="utf-8", errors="ignore") as pgn_file:
            while True:
                game = chess.pgn.read_game(pgn_file)
                if game is None:
                    break
                currNode = root
                for i, move in enumerate(game.mainline_moves()):
                    if i > halfLimit: break
                    uci = move.uci()
                    if uci not in currNode['children']:
                        currNode['children'][uci] = {'move': uci, 'count': 0, 'children': {}}
                        dbSize += 1
                    currNode['children'][uci]['count'] += 1
                    movesSeen += 1
                    currNode = currNode['children'][uci]

print("Number of moves:", movesSeen)
print("DB size:", dbSize)

  0%|          | 0/249 [00:00<?, ?it/s]

illegal san: 'Qxe1' in r2k3r/2pPp3/p4n2/3b2B1/1p5P/2qP4/3RQ1P1/4K2R w - - 2 31 while parsing <Game at 0x76fa22f0d220 ('Gelfand,B' vs. 'Gareev,T', '2019.12.29' at 'Moscow RUS')>
illegal san: 'Bf3' in 6k1/pb3r1p/1q4p1/6Q1/2p1rb2/3p3P/PP1R2P1/2BR1N1K w - - 8 38 while parsing <Game at 0x76fa2c2003e0 ('Schleining,Z' vs. 'Paehtz,E', '2014.11.23' at 'Dresden GER')>


Number of moves: 8589441
DB size: 1479274


In [7]:
with open("openings.json", "w") as f:
    json.dump(root, f, indent=4)

In [5]:
openings = []

with open("openings.json", "r") as f:
    openings = json.load(f)

In [7]:
root = Trie( 'root' )
root.total = float( 'inf' )
currNode = root
print(root)

for i, game in tqdm( list( enumerate( openings.values() ) ) ):
    moves, result = game[ 'moves' ], game[ 'result' ]
    for j, move in enumerate( moves ):
        if j >= 31: break # Openings generally doesn't exceed more than 30 half moves
        target = None
        if move in currNode.children:
            target = currNode.children[ move ]
        else:
            target = Trie( move )
            currNode.children[ move ] = target
            totalNodes += 1
        if result == 1: target.white += 1
        elif result == -1: target.black -= 1
        target.total += 1
        currNode = target
        totalMoves += 1
    currNode = root

MOVE: root:
Children: []
White: 0
Black: 0
Total: inf


  0%|          | 0/1087171 [00:00<?, ?it/s]

In [8]:
totalMoves, totalNodes

(33142925, 16656191)

In [9]:
openingDB = {}
TOTAL_THRESHOLD = 2

def processQueue( queue ):
    # Save files
    for i, ( bitmap, _ ) in enumerate( queue ):
        bitMapFile( f'./tmp/{i}.json', bitMap=bitmap, isRead=False )
    
    # Get hashes
    output = subprocess.run(["../target/release/mystic-bot", "./tmp"], capture_output=True)
    hashes = output.stdout.decode('utf-8').split('\n')
    hashes.pop()
    hashes = [ int( x.split()[ -1 ] ) for x in hashes ]

    assert len( hashes ) == len( queue )

    # Update DB
    for i, ( _, node ) in enumerate( queue ):
        if node == root.children[ 'b1c3' ].children[ 'e7e5' ]:
            print( hashes[ i ] )
        if hashes[ i ] in openingDB:
            openingDB[ hashes[ i ] ] = node.combinedChildrenSummary( openingDB[ hashes[ i ] ] )
        else:
            openingDB[ hashes[ i ] ] = node.childrenSummary()
            if len( openingDB ) % 10000 == 0:
                print( f"OpeningDB Size: { len( openingDB ) }" )

    # Remove stale files
    for i in range( len( queue ) ):
        os.remove( f'./tmp/{i}.json' )
    queue.clear()

def openingHelper( currNode, nodeList, pb, hashingQueue ):

    if len(currNode.children) == 0:
        # If there are no further moves return
        # return
        return

    if len( hashingQueue ) >= 500:
        processQueue( hashingQueue )
        hashingQueue = []

    if currNode.total >= TOTAL_THRESHOLD:
        # Only process node if enough games have reached the position
        board = chess.Board()
        for node in nodeList[1:]:
            board.push_uci( node.move )
        bitMap = boardToBitMap( board )
        hashingQueue.append( ( bitMap, currNode ) )

    for nextNode in currNode.children.values():
        nodeList.append(nextNode)
        pb.update( 1 )
        openingHelper( nextNode, nodeList, pb, hashingQueue )
        nodeList.pop()

hashingQueue = []

pb = tqdm(total=totalNodes)
# openingHelper( root, [ root ], pb, hashingQueue )

  0%|          | 0/16656191 [00:00<?, ?it/s]

In [15]:
root.children[ 'b1c3' ].children[ 'e7e5' ]

<__main__.Trie at 0x11d2fd16cd0>

In [2]:
board = chess.Board()
bitMap = boardToBitMap( board )
bitMapFile( 'tmp.json', bitMap=bitMap, isRead=False )

In [5]:
board.ep_square

44