In [3]:
import chess
import pandas as pd
import numpy as np
import time
import positional_features
from IPython.display import clear_output
from stockfish import Stockfish

DATASET_OLD_PATH = '../data_old/dataset_kingdanger.csv'
DATASET_PATH = '../data/dataset.csv'

TEST_DATASET_PATH = '../test/test_dataset.csv'
NEW_TEST_DATASET_PATH = '../test/new_test_dataset.csv'
data = pd.read_csv(DATASET_OLD_PATH, index_col=0)

stockfish = Stockfish(path = 'C:\Program Files\stockfish\stockfish-windows-x86-64-avx2.exe', depth=13, parameters={'Threads': 4, 'Hash': 32})

In [4]:
def get_sf_eval(fen):
    stockfish.set_fen_position(fen)
    eval = stockfish.get_evaluation()
    
    if eval['type'] == 'cp':
        return eval['value'] / 100.0
    else:
        return float('inf') if eval['value'] > 0 else float('-inf')
    
def get_sf_eval_before_and_after(fen, move):
    eval_before = get_sf_eval(fen)
    
    board = chess.Board(fen)
    board.push_uci(move)
    
    eval_after = get_sf_eval(board.fen())
    
    return (eval_before, eval_after, round(eval_after - eval_before, 2))

def is_top_move(fen, move):
    stockfish.set_fen_position(fen)
    top_moves = [m['Move'] for m in stockfish.get_top_moves(3)]
    return move in top_moves

In [None]:
size = len(data)
evals = []
top_moves = []
game_phases = []
king_danger_scores = []

for index, row in data.iterrows():
    if index % 100 == 0:
        clear_output()
        print(f'{index}/{size}')
    
    fen = row['fen']
    move = row['move']
    board = chess.Board(fen)
    
    evals.append(get_sf_eval_before_and_after(fen, move))
    top_moves.append(is_top_move(fen, move))
    
    game_phases.append(positional_features.game_phase(board))
    king_danger_scores.append(positional_features.king_danger_score(board, board.turn))
    
evals_before, evals_after, evals_delta = zip(*evals)
user_king_danger_scores, opponent_king_danger_scores, king_danger_diffs = zip(*king_danger_scores) 

data['eval_before'] = evals_before
data['eval_after'] = evals_after
data['eval_delta'] = evals_delta
data['is_top_move'] = top_moves
data['game_phase'] = game_phases
data['user_king_danger'] = user_king_danger_scores
data['opponent_king_danger'] = opponent_king_danger_scores
data['king_danger_diff'] = king_danger_diffs

data.to_csv(DATASET_PATH)

# 105293 positions evaluated in 376 minutes ~= 7 hours

In [6]:
size = len(data)
center_control_scores = []
file_control_scores = []
pawn_structures = []

for index, row in data.iterrows():
    if index % 1000 == 0:
        clear_output()
        print(f'{index}/{size}')
        
    fen = row['fen']
    move = row['move']
    board = chess.Board(fen)
        
    center_control_scores.append(positional_features.center_control_score(board, board.turn))
    file_control_scores.append(positional_features.file_control_score(board, board.turn))
    pawn_structures.append(positional_features.pawn_structure(board))
    
        
user_center_control_scores, opponent_center_control_scores, center_control_diffs = zip(*center_control_scores)
user_file_control_scores, opponent_file_control_scors, file_control_diffs = zip(*file_control_scores)

data['user_center_control_score'] = user_center_control_scores
data['opponent_center_control_score'] = opponent_center_control_scores
data['center_control_diff'] = center_control_diffs

data['user_file_control_score'] = user_file_control_scores
data['opponent_file_control_score'] = opponent_file_control_scors
data['file_control_diff'] = file_control_diffs

data['pawn_structure'] = pawn_structures

data.to_csv(DATASET_PATH)

# 105293 positions evaluated in 40 seconds! goes to show how resource-heavy stockfish is.

105000/105293
