In [1]:
import chess
import concurrent.futures
import chess.pgn
import chess.engine
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from tqdm import tqdm

In [2]:
def fen_to_vec(fen, w_elo, b_elo, w_time, b_time, time_control): 
    board = chess.Board(fen)
    stockfish_path = r"C:\Users\DELL\Desktop\stockfish\stockfish-windows-x86-64-avx2\stockfish\stockfish-windows-x86-64-avx2"
    engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
    eval = str(engine.analyse(game.board(), chess.engine.Limit(time = 0.1))['score'].white())
    if eval[0] == '#':
        eval = (eval[1] == '+')*5000 - (eval[1] == '-')*5000
    vec = np.array([w_elo, b_elo, int(eval), (w_time - b_time)*100/time_control])
    return vec

In [3]:
def eval_fun(fen):
    with chess.engine.SimpleEngine.popen_uci(r"C:\Users\DELL\Desktop\stockfish\stockfish-windows-x86-64-avx2\stockfish\stockfish-windows-x86-64-avx2") as engine:
        board = chess.Board(fen)
        eval = str(engine.analyse(board, chess.engine.Limit(time = 0.1))['score'].white())
        if eval[0] == '#':
            eval = (eval[1] == '+')*5000 - (eval[1] == '-')*5000
        return int(eval)

Features to extract:
1. White ELO
2. BLack ELO
3. White clock
4. Black clock
5. Percent time difference
6. Position Evaluation
7. % White win lines
8. % Draw lines
9. Increment in seconds
10. Ply number (indicates the phase of the game)

In [5]:
# game_details = []
# # stockfish_path = r"C:\Users\DELL\Desktop\stockfish\stockfish-windows-x86-64-avx2\stockfish\stockfish-windows-x86-64-avx2"
# # engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)
# for i in range(2013, 2014):
#     pgn = open(str(i) + '.pgn')
#     while 1:
#         game = chess.pgn.read_game(pgn)
#         if game is None:
#             break
#         n_plies = int(game.headers['PlyCount'])
#         if game.headers['WhiteClock'] != game.headers['BlackClock'] or n_plies < 40: #no handicap and short games, only classical & rapid games
#             continue
#         w_elo = int(game.headers['WhiteElo'])
#         b_elo = int(game.headers['BlackElo'])
#         result = game.headers['Result']
#         if result == '1-0':
#             result = 1
#         elif result == '0-1':
#             result = -1
#         else:
#             result = 0
#         total_time = int(game.headers['TimeControl'].split('+')[0]) #in seconds 
#         inc = int(game.headers['TimeControl'].split('+')[1]) #increment time in seconds
#         nums = list(range(30, n_plies))
#         random.shuffle(nums)
#         sel_plies = sorted(nums[:4])
#         k = 0
#         n = 1
#         w_time = b_time = 0 #time elapsed for each side
#         while len(game.variations):
#             if n == sel_plies[k]:
#                 b = game.board()
#                 # eval_init = str(engine.analyse(b, chess.engine.Limit(time = 0.1))['score'].white())
#                 # eval_final = str(engine.analyse(game.variations[0].board(), chess.engine.Limit(time = 0.1))['score'].white())
#                 # if eval_init[0] == '#':
#                 #     eval_init = (eval_init[1] == '+')*5000 - (eval_init[1] == '-')*5000
#                 # if eval_final[0] == '#':
#                 #     eval_final = (eval_final[1] == '+')*5000 - (eval_final[1] == '-')*5000
#                 # cp_loss = abs(int(eval_final) - int(eval_init))
#                 # % winning lines
#                 # w_win = b_win = 0
#                 # n_moves = len(list(b.legal_moves))
#                 # for move in b.legal_moves:
#                 #     b.push(move)
#                 #     move_eval = str(engine.analyse(b, chess.engine.Limit(time = 0.1))['score'].white())
#                 #     if move_eval[0] == '#':
#                 #         if move_eval[1] == '+':
#                 #             w_win += 1
#                 #         else:
#                 #             b_win += 1
#                 #     elif int(move_eval) >= 30:
#                 #         w_win += 1
#                 #     elif int(move_eval) <= -30:
#                 #         b_win += 1
#                 #     b.pop()
#                 # per_w = w_win*100/n_moves #% moves leading to white adv
#                 # per_b = b_win*100/n_moves #% moves leading to black adv
#                 # per_draw = 100 - per_w - per_b
#                 details = [b.fen(), n, w_elo, b_elo, total_time - w_time, total_time - b_time, inc, str(game.variations[0].move), result]
#                 details_str = [str(detail) for detail in details]
#                 with open('game_details_win_percent2.txt', 'a') as f:
#                     f.write('$'.join(details_str) + '\n')
#                 k += 1
#                 if k == len(sel_plies):
#                     break
#             if n % 2:
#                 w_time += float(game.variations[0].comment.split(']')[0].split(' ')[-1])
#             else:
#                 b_time += float(game.variations[0].comment.split(']')[0].split(' ')[-1])
#             game = game.variations[0]
#             n += 1

READING THE DATA FROM THE FILE

In [2]:
def num_moves(fen):
    board = chess.Board(fen)
    return len(list(board.legal_moves))

In [3]:
X = []
with open('game_details_win_percent2.txt', 'r') as f:
    for line in f:
        X.append(line.strip().split('$'))
X = np.array(X)
X = np.concatenate(((X[:,1].astype(int)%2).reshape(-1, 1), X), axis = 1)

In [4]:
fens = X[:, 1]
with concurrent.futures.ThreadPoolExecutor() as executor:
    n_moves = np.array(list(executor.map(num_moves, fens)))
X = np.concatenate((n_moves.reshape(-1, 1), X), axis = 1)

In [5]:
X

array([['20', '0',
        'rn3r1k/pp3ppp/4p3/3p1N2/8/2NP2PP/PPP3P1/2KR3R b - - 0 17', ...,
        '10', 'e6f5', '1'],
       ['37', '0',
        '3r3k/1p3ppp/p1n1r3/3N1p2/1PP5/P2P2PP/3K2P1/4R2R b - - 0 23',
        ..., '10', 'c6d4', '1'],
       ['12', '1', '4k3/1p6/p5pp/3p1p2/1PP4P/P1K3P1/6P1/8 w - - 0 34',
        ..., '10', 'c4d5', '1'],
       ...,
       ['27', '1', '1b6/5pk1/B1R3pp/1p6/4r2P/P2r2P1/1P4RK/8 w - - 7 35',
        ..., '0', 'a6b5', '-1'],
       ['1', '1', '1b6/5pk1/2R3pp/1B6/7r/P2r2P1/1P4RK/8 w - - 0 36', ...,
        '0', 'h2g1', '-1'],
       ['6', '1', '8/2b2pk1/6p1/PR6/2r3P1/5B2/2K4p/8 w - - 3 52', ...,
        '0', 'c2b2', '-1']], dtype='<U76')

In [7]:
def aug(fen):
    #b = chess.Board(fen)
    #with chess.engine.SimpleEngine.popen_uci(r"C:\Users\DELL\Desktop\stockfish\stockfish-windows-x86-64-avx2\stockfish\stockfish-windows-x86-64-avx2") as engine:
    #eval = str(engine.analyse(board, chess.engine.Limit(time = 0.1))['score'].white())
    eval = eval_fun(fen)
    if eval[0] == '#':
        eval = (eval[1] == '+')*5000 - (eval[1] == '-')*5000
    b = chess.Board(fen)
    w_win = b_win = 0
    n_moves = len(list(b.legal_moves))
    # next_fens = []
    for move in b.legal_moves:
        b.push(move)
        #next_fens.append(b.fen())
        move_eval = eval_fun(b.fen())
        if move_eval[0] == '#':
            if move_eval[1] == '+':
                w_win += 1
            else:
                b_win += 1
        elif int(move_eval) >= 30:
            w_win += 1
        elif int(move_eval) <= -30:
            b_win += 1
        b.pop()
    # with concurrent.futures.ThreadPoolExecutor() as executor_next:
    #     next_evals = list(executor_next.map(eval_fun, next_fens))
    # for eval in next_evals:
    #     if eval[0] == '#':
    #         if eval[1] == '+':
    #             w_win += 1
    #         else:
    #             b_win += 1
    #     elif int(eval) >= 30:
    #         w_win += 1
    #     elif int(eval) <= -30:
    #         b_win += 1
    per_w = w_win*100/n_moves #% moves leading to white adv
    per_b = b_win*100/n_moves #% moves leading to black adv
    per_draw = 100 - per_w - per_b
    return [int(eval), per_w, per_draw]

In [8]:
fens = X[:, 0]
eval_features = []
with concurrent.futures.ThreadPoolExecutor() as executor:
    #eval_features = np.array(list(executor.map(aug, fens)))
    progress = tqdm(total = len(fens))
    futures = [executor.submit(eval_fun, fen) for fen in fens]
    for future in concurrent.futures.as_completed(futures):
        # Update progress bar
        result = future.result()
        with open('eval_features_win_percent.txt', 'a') as f:
            #f.write('$'.join([str(item) for item in result]) + '\n')
            f.write(str(result) + '\n')
        eval_features.append(result)
        progress.update(1)
    progress.close()
eval_features = np.array(eval_features)

100%|████████████████████████████████████████████████████████████████████████| 325348/325348 [8:03:31<00:00, 11.21it/s]


In [6]:
Y = X[:, -1].astype(int)
ohe = OneHotEncoder(sparse_output = False)
Y_ohe = ohe.fit_transform(Y.reshape(-1, 1))
print(ohe.categories_)

[array([-1,  0,  1])]


In [7]:
X[0]

array(['20', '0',
       'rn3r1k/pp3ppp/4p3/3p1N2/8/2NP2PP/PPP3P1/2KR3R b - - 0 17', '34',
       '2059', '1941', '663.512', '399.17199999999997', '10', 'e6f5', '1'],
      dtype='<U76')

In [8]:
eval_features = []
with open('eval_features_win_percent.txt', 'r') as f:
    for line in f:
        eval_features.append(int(line.strip()))
eval_features = np.array(eval_features)

In [10]:
X = np.concatenate((np.array([[float(Xij) for Xij in Xi[[0, 1, 3, 4, 5, 6, 7, 8]]] for Xi in X]), eval_features.reshape(-1,1)), axis = 1)

In [11]:
x_train, x_test_val, y_train, y_test_val = train_test_split(X, Y_ohe, test_size = 0.3, shuffle = True, random_state = 0)
x_val, x_test, y_val, y_test = train_test_split(x_test_val, y_test_val, test_size = 0.6)

BUILDING THE MODEL WIN DRAW LOSS MODEL

In [33]:
ni = len(X[0])
no = len(Y_ohe[0])
alpha = 5
ns = len(x_train)
nh = int(ns/(ni + no)/alpha)
# best 192 2 layers, accuracy test = 73.22%
model = Sequential([
    Dense(192, input_shape = (ni,), activation = 'relu'),
    #Dropout(0.1),
    Dense(192, activation = 'relu'),
    #Dropout(0.1),
    #Dense(192, activation = 'relu'),
    Dense(no, activation = 'softmax')
])

In [34]:
model.compile(
    optimizer = 'adam',
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [35]:
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 5)
model.fit(x_train, y_train, epochs = 50, validation_data = (x_val, y_val), batch_size = 32, callbacks = [early_stopping_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50


<keras.src.callbacks.History at 0x2a1ba325390>

MODEL EVALUATION

In [39]:
y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis = 1) - 1
y_true = ohe.inverse_transform(y_test).reshape(-1)
print(accuracy_score(y_true, y_pred)*100)
confusion_matrix(y_true, y_pred)

73.37055820227789


array([[17265,  2903,  3437],
       [ 1546,  5231,  2610],
       [ 2579,  2520, 20472]], dtype=int64)

In [38]:
model.save('best_win_loss_model.keras')