In [42]:
import sqlite3
import re
import io
import chess.pgn

In [4]:
connection = sqlite3.connect('../../chess_data/lichess.db')
cursor = connection.cursor()

In [10]:
data_path = '../../chess_data/lichess_db_standard_rated_2022-06.pgn'
only_evaluated_path = '../../chess_data/lichess_evaluated_2022-06.pgn'

In [None]:
def pgn_with_evaluation(data_path, only_evaluated_path):
    with open(data_path) as input, open(only_evaluated_path, 'w') as output:
        half = False
        one_png = []
        game_in, game_out = 0, 0
        while line:=input.readline():
            if line == "\n":
                if half:
                    half = False
                    if "eval" in one_png[-1]:
                        output.write(''.join(one_png+['\n']))
                        game_in += 1
                    else:
                        game_out += 1
                    if (game_in + game_out)%100000 == 0:
                        print(f'Games with eval {game_in}/{game_in+game_out}')
                    one_png = []
                else:
                    half = True
                    one_png.append(line)
            else:
                one_png.append(line)

In [37]:
def pgn_with_above_2000(data_path, only_above_2000_path):
    with open(data_path) as input, open(only_above_2000_path, 'w') as output:
        half = False
        one_png = []
        game_in, game_out = 0, 0
        elo_white, elo_black = 0, 0
        while line:=input.readline():
            if line == "\n":
                if half:
                    half = False
                    if elo_white > 2000 and elo_black > 2000:
                        output.write(''.join(one_png+['\n']))
                        game_in += 1
                    else:
                        game_out += 1
                    if (game_in + game_out)%100000 == 0:
                        print(f'Games with eval {game_in}/{game_in+game_out}')
                    one_png = []
                else:
                    half = True
                    one_png.append(line)
            elif "[WhiteElo" in line:
                elo_white = int(re.search('(?<=")\d+', line).group(0))
            elif "[BlackElo" in line:
                elo_black = int(re.search('(?<=")\d+', line).group(0))
            else:
                one_png.append(line)

In [None]:
data_path = '../../chess_data/lichess_evaluated_2022-06.pgn'
only_above_2000_path = '../../chess_data/lichess_evaluated_above_2000_2022-06.pgn'
pgn_with_above_2000(data_path, only_above_2000_path)

In [111]:
def convert_move(move):
    from_position = ord(move[0]) - 97 + 8*(int(move[1])-1)
    to_position = ord(move[2]) - 97 + 8*(int(move[3])-1)

    return from_position * 64 + to_position


def convert_to_nn_input(data_path, output_path):
    with open(data_path) as input, open(output_path, 'w') as output:
        half = False
        one_png = []
        results = []
        evaluation = 0.2
        while line:=input.readline():
            if line == "\n":
                if half:
                    pgn = io.StringIO(one_png[-1])
                    game = chess.pgn.read_game(pgn)
                    board = game.board()
                    for move_node in game.mainline():
                        # print(board.fen(), evaluation, convert_move(str(move_node.move)))
                        results.append(str([board.fen(), evaluation, convert_move(str(move_node.move))]))
                        if '#' in move_node.comment:
                            evaluation = int(re.search('(?<=val #)[-0-9]+', move_node.comment).group(0))
                            evaluation = 20 * (-1 if evaluation < 0 else 20)
                        elif 'eval' in move_node.comment:
                            evaluation = float(re.search('(?<=val )[0-9\.-]+', move_node.comment).group(0))
                        else:
                            print([board.fen(), evaluation, convert_move(str(move_node.move))])
                            break
                        board.push(move_node.move)
                    
                    output.write('\n'.join(results+['']))
                    one_png = []
                    half = False
                else:
                    half = True
                    one_png.append(line)
            else:
                one_png.append(line)

In [112]:
data_path = '../../chess_data/lichess_evaluated_above_2000_2022-06.pgn'
output_path = '../../chess_data/lichess_nn_2022-06.pgn'
convert_to_nn_input(data_path, output_path)

['6k1/6P1/6K1/8/8/8/1p6/1B6 w - - 0 63', 400, 72]
['8/6k1/7p/8/6P1/8/2qp4/5K2 b - - 1 60', -20, 707]
['r1k5/pp2Q3/2pB1n1p/6p1/8/8/P3K1PP/8 w - - 2 25', 400, 3378]
['4Rq1k/5Prp/p7/4Q3/1p1p4/P7/2P4P/5K2 w - - 0 38', 400, 3901]
['5R2/p1rkbQ2/3p4/3B4/1P2P1bp/3Pq3/P5PK/8 w - - 0 32', 400, 3452]
['r4r1k/pppb2pp/2B5/8/2N1p3/1QP5/PP1PK2P/R1B3q1 b - - 1 20', -20, 3294]
['8/8/6q1/5q2/2K5/8/2kb4/8 b - - 5 66', -20, 2974]
['2r3k1/7p/K6P/8/8/1qp5/8/8 b - - 1 53', -20, 3768]
['2kr3r/ppp3p1/2nb1p2/8/8/2P1PNPq/PPQ1PK2/R1B2R2 b - - 1 19', -20, 1494]
['r1b2r1k/ppp1QBbp/2n3p1/3P2N1/8/1P5q/PBP2P1P/2K3R1 w - - 1 17', 400, 3389]
['2k1r3/2n5/q2pN1N1/p7/4P1P1/p1Q5/P1PK4/8 w - - 0 37', 400, 1202]
['1r1R1R2/1b4pk/p3p1pq/1p6/2B1Q2P/8/PP3PP1/6K1 w - - 2 26', 400, 3967]
['8/3n3p/3bk1p1/p4p2/Ppp2P1B/1BP2K1P/1P4P1/8 w - - 0 38', 400, 1114]
['8/ppk1b1p1/2B1Q1P1/4P3/2pP2P1/7q/P1P2r2/1R4K1 b - - 6 26', -20, 1487]
['r3r1k1/1bp1qppN/1p5p/p1p5/5P2/1P1B2Q1/PBPP2PP/5RK1 w - - 3 20', 400, 1462]
['1r6/4Bp2/p2p2k1/K2P1np1/1P5p

OSError: [Errno 28] No space left on device