In [39]:
import chess
import chess.engine
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [40]:
stockfish_path = "../engine/stockfish-ubuntu-x86-64-avx2"
engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)

In [41]:
with open("./scrap/saved_files/game_commentary.txt", "r") as f:
    lines = f.readlines()

print("Number of lines: ", len(lines))

Number of lines:  11601


In [42]:
def determine_phase(board):
    moves = len(board.move_stack)
    num_pieces = len(board.piece_map())
    if moves <= 10:
        return "Opening"
    elif num_pieces <= 12:
        return "Endgame"
    else:
        return "Middlegame"

In [None]:
games = 0
data = []

for line in lines:
    if len(data) > 10000:
        break
    if not line.strip():
        continue
    if line.startswith("1.") and not line.startswith("1..."): 
        games += 1
        board = chess.Board()
        history = []

    game_segments = line.strip().split("<move>")
    
    for gc in tqdm(game_segments):
        moves_commentary = gc.split("<sep>")
        if len(moves_commentary) == 1:
            continue

        pgn_moves, commentary = moves_commentary[0].strip(), moves_commentary[1].strip()

        move_list = pgn_moves.split(" ")
        eval_before, eval_after, delta_eval, top_k_best_moves = None, None, None, []

        for idx, move in enumerate(move_list):
            if move[0] in "0123456789": 
                continue
            try:
                chess_move = board.push_san(move)
                history.append(move)

                if idx == len(move_list) - 1: 
                    if len(board.move_stack) > 1:
                        board.pop()  
                        eval_before = engine.analyse(board, chess.engine.Limit(time=0.5))["score"].relative
                        eval_before = eval_before.score() if not eval_before.is_mate() else "Mate"
                        board.push(chess_move)

                    eval_after = engine.analyse(board, chess.engine.Limit(time=0.5))["score"].relative
                    eval_after = eval_after.score() if not eval_after.is_mate() else "Mate"

                    if isinstance(eval_before, int) and isinstance(eval_after, int):
                        delta_eval = eval_after - eval_before

                    if not eval_after == "Mate":                    
                        best_moves = engine.analyse(board, chess.engine.Limit(time=0.5))["pv"][:3]
                        top_k_best_moves = [move.uci() for move in best_moves]
                    else:
                        top_k_best_moves = []

                    data.append({
                        "Move": move,
                        "History (PGN)": " ".join(history[:-1]),
                        "Commentary": commentary,
                        "Eval Before": eval_before,
                        "Eval After": eval_after,
                        "Delta": delta_eval,
                        "Top K Best Moves": ", ".join(top_k_best_moves)
                    })
            except ValueError:
                #print(f"Invalid move: {move}")
                break

print("Number of games: ", games)

engine.quit()

  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 15/15 [00:21<00:00,  1.42s/it]
100%|██████████| 12/12 [00:14<00:00,  1.23s/it]
100%|██████████| 21/21 [00:29<00:00,  1.42s/it]
100%|██████████| 7/7 [00:09<00:00,  1.30s/it]
100%|██████████| 5/5 [01:14<00:00, 14.99s/it]

Number of games:  4





In [48]:
df = pd.DataFrame(data)
df.to_csv("./game_cmt.csv", index=False)

In [51]:
len(df)

51

In [52]:
df.head()

Unnamed: 0,Move,History (PGN),Commentary,Eval Before,Eval After,Delta,Top K Best Moves
0,g6,c4 c5 Nf3 Nf6 d4 cxd4 Nxd4,"Many 4th moves for Black have been tried here,...",-13,34,47.0,"b1c3, d7d6, e2e4"
1,d5,c4 c5 Nf3 Nf6 d4 cxd4 Nxd4 g6 Nc3,"White was threatening 6 P - K 4, transposing t...",-41,30,71.0,"c1g5, f8g7"
2,Bg5,c4 c5 Nf3 Nf6 d4 cxd4 Nxd4 g6 Nc3 d5,"A better choice than 6 P x P, which leads to a...",24,-16,-40.0,"f8g7, g5f6"
3,Qa5,c4 c5 Nf3 Nf6 d4 cxd4 Nxd4 g6 Nc3 d5 Bg5 dxc4 e3,Premature. Better was 7... B - Kt 2.,-22,47,69.0,"g5f6, e7f6, f1c4"
4,Bb4,c4 c5 Nf3 Nf6 d4 cxd4 Nxd4 g6 Nc3 d5 Bg5 dxc4 ...,That he has played for the fianchetto and then...,-67,81,148.0,"a1c1, b4c3, c1c3"
