### 1. Import and data loading

In [1]:
import pandas as pd
import chess.engine
import numpy as np
from tqdm import tqdm

In [2]:
df = pd.read_csv("data/chess_games_feature.csv")
clean_df = pd.read_csv("data/chess_games_clean.csv")
df.head()

Unnamed: 0,WhiteElo,BlackElo,ResultNumeric,EventTypeID,RatingDiff,AvgRating,WhiteIsHigherRated,WhiteRatingAdvantage,ECOID,w_material_early,...,b_piece_activity,w_doubled_pawns,b_doubled_pawns,doubled_pawns_diff,w_isolated_pawns,b_isolated_pawns,isolated_pawns_diff,both_castled_early,total_captures_early,activity_diff
0,2487,2596,1,0,109,2541.5,0,-109,0,20.0,...,36.0,0.0,0.0,0.0,1.0,1.0,0.0,0,15.0,0.0
1,2542,2433,2,0,109,2487.5,1,109,9,16.0,...,20.0,0.0,0.0,0.0,1.0,2.0,-1.0,0,15.0,11.0
2,2404,2534,0,0,130,2469.0,0,-130,0,21.0,...,34.0,0.0,0.0,0.0,1.0,1.0,0.0,1,9.0,-1.0
3,2860,2922,1,0,62,2891.0,0,-62,0,33.0,...,39.0,0.0,0.0,0.0,0.0,0.0,0.0,0,4.0,0.0
4,2752,2395,2,0,357,2573.5,1,357,0,18.0,...,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0,14.0,10.0


In [3]:
df = df.sample(n=100_000, random_state=42).reset_index(drop=True)
clean_df = clean_df.loc[df.index].reset_index(drop=True)

df['BoardStateAtMoveN'] = clean_df['BoardStateAtMoveN']

### 2. Setup Stockfish

In [4]:
engine = chess.engine.SimpleEngine.popen_uci(["stockfish"])

In [5]:
def eval_fen(fen, movetime=0.02):
    if pd.isna(fen):
        return None
    board = chess.Board(fen)
    info = engine.analyse(board, chess.engine.Limit(time=movetime))
    score = info["score"].pov(chess.WHITE)
    if score.is_mate():
        sign = 1 if score.mate() > 0 else -1
        cp = 10000 * sign
    else:
        cp = score.cp
    return cp

evals = []
for fen in tqdm(df['BoardStateAtMoveN'], total=len(df)):
    evals.append(eval_fen(fen, movetime=0.05))

100%|██████████| 100000/100000 [1:25:34<00:00, 19.48it/s]


In [6]:
df['sf_eval_cp'] = evals
engine.quit()

### 3. Map to win probability

In [7]:
def cp_to_winprob(cp):
    if cp is None:
        return None
    return 1 / (1 + np.exp(-cp / 400))

df['sf_eval_winprob'] = df['sf_eval_cp'].apply(cp_to_winprob)
df.head()

Unnamed: 0,WhiteElo,BlackElo,ResultNumeric,EventTypeID,RatingDiff,AvgRating,WhiteIsHigherRated,WhiteRatingAdvantage,ECOID,w_material_early,...,doubled_pawns_diff,w_isolated_pawns,b_isolated_pawns,isolated_pawns_diff,both_castled_early,total_captures_early,activity_diff,BoardStateAtMoveN,sf_eval_cp,sf_eval_winprob
0,3033,2825,2,0,208,2929.0,1,208,0,26.0,...,0.0,0.0,2.0,-2.0,1,11.0,7.0,8/5pkp/b1p3p1/2P5/5Q2/1Nn1P1P1/5P1P/3q1BK1 w -...,114,0.570772
1,2478,2567,2,0,89,2522.5,0,-89,0,33.0,...,0.0,0.0,0.0,0.0,0,4.0,-1.0,3k3r/6Rp/2nB4/p3pPP1/1pK1P3/8/n1P3BP/8 w - - 0 31,524,0.787513
2,2572,2519,1,0,53,2545.5,1,53,0,19.0,...,-1.0,2.0,3.0,-1.0,0,15.0,-6.0,6k1/2q2p1p/bn2p1pP/3pP3/pR1P1P2/P1p1NNP1/2P3B1...,127,0.578715
3,2471,2605,0,0,134,2538.0,0,-134,6,20.0,...,0.0,3.0,1.0,2.0,0,15.0,2.0,3q4/r2n1pbk/1p2b1pp/2pNp3/p1PnP3/2RBBN1P/PP3PP...,21,0.513122
4,2502,2470,2,0,32,2486.0,1,32,0,24.0,...,0.0,1.0,2.0,-1.0,0,14.0,-5.0,r3r1k1/1N1R1pp1/2R4p/pn6/1p2PP2/1P3KP1/P7/8 w ...,125,0.577495


In [8]:
df = df.drop(columns=['BoardStateAtMoveN'])

In [9]:
df.to_csv("data/chess_games_feature_60_sf.csv", index=False)