In [34]:
import chess.pgn
import pandas as pd
from tqdm import tqdm
import io
import time
#import zstandard as zstd

In [35]:
## Decompressing the file downloaded from lichess
# dctx = zstd.ZstdDecompressor()
# with open('lichess_db_standard_rated_2022-12.pgn.zst', 'rb') as ifh, open('pgns\\lichess_db_standard_rated_2022-12.pgn', 'wb') as ofh:
#     dctx.copy_stream(ifh, ofh, write_size=65536)

In [36]:
def get_eval(game,n = None, t = None, threads =8):
    # Note - I would like to make a class which contains all important metrics for my neural net
    board = game.board()
    move_number = 0
    white = []
    black = []
    centipawns = []
    engine = chess.engine.SimpleEngine.popen_uci("stockfish_15_win_x64_avx2\\stockfish_15_x64_avx2.exe")
    engine.configure({"Threads": threads}) # or even higher

    for move in game.mainline_moves():
        board.push(move)
        if move_number == 0:
            info = engine.analyse(board, chess.engine.Limit(depth = n,time=t))
            prev_centipawn = info["score"].white().score()
        if move_number > 0:
            info = engine.analyse(board, chess.engine.Limit(depth = n,time = t))
            centipawn = info["score"].white().score()
            if centipawn is None:
                centipawn = 0
            if move_number % 2 == 0:
                white.append(centipawn - prev_centipawn)
            else:
                black.append(- (centipawn - prev_centipawn))
            prev_centipawn = centipawn
            centipawns.append(centipawn)
        move_number +=1
    engine.quit()
    return white, black, centipawns

In [37]:
class Eval:
    def __init__(self, game, n, time_limit,threads):
        self.game = game
        self.white_eval,self.black_eval,self.centipawns  = get_eval(self.game, n, time_limit,threads)

In [38]:
def to_eval(x,n, t,threads):
    eval = Eval(x,n,t,threads )
    return eval

In [39]:
# Defining a function to get the eval from a game
def get_game_eval(game):
    evals = []
    while game.eval():
        evals.append(game.eval().score())
    return evals

In [40]:
def pgn_to_csv(player, start, end):
    i = 0
    evaluated = 0
    file2 = open(player + '.pgn',encoding="utf-8",errors = "ignore")
    game = chess.pgn.read_game(file2)

    j = start

    white = []
    black = []
    white_elo = []
    black_elo = []
    date = []
    event = []
    result = []
    rounds = []
    opening = []
    games = []
    evals = []
    tc = []
    clock = []

    while isinstance(game,chess.pgn.Game) and i < end:
        if i % 5000 == 0:
            print(f'Checked: {i}')
            print(f'Evaluated: {evaluated}')

        # iterating through pgns until the starting number is reached
        if i <= j:
            i +=1
            game = chess.pgn.read_game(file2)
            continue

        if len(game.variations) > 1:
            i +=1
            game = chess.pgn.read_game(file2)
            continue

        pos = game
        pos_cl = game
        eval = []
        clock_time = []

        # iterates through moves until a position is not evaluated
        while pos.next() is not None:
            pos = pos.next()
            ev = pos.eval()
            if ev is None:
                break
            eval.append(ev.white().score(mate_score=1000))

        # iterates through moves until a position is not evaluated
        #print(pos_cl.next())
        while pos_cl.next() is not None:
            pos_cl = pos_cl.next()
            cl = pos_cl.clock()
            if cl is None:
                break
            clock_time.append(cl)
            
        if len(eval)==0:
            game = chess.pgn.read_game(file2)
            i +=1
            continue
        
        evals.append(eval)
        clock.append(clock_time)
        # adding fields from the pgn
        if "White" in game.headers:
            white.append(game.headers["White"])
        else:
            white.append('*')
        if "Round" in game.headers:
            rounds.append(game.headers["Round"])
        else:
            white.append('*')
        if "Black" in game.headers:
            black.append(game.headers["Black"])
        else:
            black.append('*')
        if "WhiteElo" in game.headers:
            white_elo.append(game.headers["WhiteElo"])
        else:
            white_elo.append('*')
        if "BlackElo" in game.headers:
            black_elo.append(game.headers["BlackElo"])
        else:
            black_elo.append('*')
        if "Date" in game.headers:
            date.append(game.headers["Date"])
        else:
            date.append('*')
        if "EventType" in game.headers:
            event.append(game.headers["EventType"])
        else:
            event.append('*')
        if "TimeControl" in game.headers:
            tc.append(game.headers["TimeControl"])
        if "ECO" in game.headers:
            opening.append(game.headers["ECO"])
        else:
            opening.append('*')
        if "Result" in game.headers:
            result.append(game.headers["Result"])
        else:
            result.append('*')
        games.append(game.mainline_moves())

        # reading the next pgn file
        game = chess.pgn.read_game(file2)


        i +=1
        evaluated +=1

    # forming the dataframe and saving to a csv

    data = {"White": white, "Black": black, "WhiteELO": white_elo, "BlackELO": black_elo, "Date": date,"Event": event, "Result": result, "Opening": opening, "Rounds": rounds, "Game": games, "Eval": evals, "EventType": event, "TimeControl": tc, "Clock": clock}
    df1 = pd.DataFrame(data)
    df1.to_csv(player +'_'+ str(start) + '_'+ str(end)+'.csv')


In [41]:
file_2 = 'lichess_db_standard_rated_2023-08'
pgn_to_csv(file_2, 1, 1000000)

Checked: 0
Evaluated: 0
Checked: 5000
Evaluated: 437
Checked: 10000
Evaluated: 914
Checked: 15000
Evaluated: 1321
Checked: 20000
Evaluated: 1785
Checked: 25000
Evaluated: 2246
Checked: 30000
Evaluated: 2650
Checked: 35000
Evaluated: 3057
Checked: 40000
Evaluated: 3464
Checked: 45000
Evaluated: 3933
Checked: 50000
Evaluated: 4375
Checked: 55000
Evaluated: 4797
Checked: 60000
Evaluated: 5265
Checked: 65000
Evaluated: 5732
Checked: 70000
Evaluated: 6170
Checked: 75000
Evaluated: 6609
Checked: 80000
Evaluated: 7051
Checked: 85000
Evaluated: 7502
Checked: 90000
Evaluated: 7944
Checked: 95000
Evaluated: 8387
Checked: 100000
Evaluated: 8823
Checked: 105000
Evaluated: 9244
Checked: 110000
Evaluated: 9699
Checked: 115000
Evaluated: 10158
Checked: 120000
Evaluated: 10586
Checked: 125000
Evaluated: 11035
Checked: 130000
Evaluated: 11464
Checked: 135000
Evaluated: 11938
Checked: 140000
Evaluated: 12381
Checked: 145000
Evaluated: 12822
Checked: 150000
Evaluated: 13259
Checked: 155000
Evaluated: 137