In [1]:
import chess
import pandas as pd
import numpy as np
import time
import positional_features
from IPython.display import clear_output
from stockfish import Stockfish

CSV_FILE_PATH = "../data/game_data.csv"
USER = "matei_popescu1510"
stockfish = Stockfish(path = "C:\Program Files\stockfish\stockfish-windows-x86-64-avx2.exe", depth=15)

In [2]:
game_data = pd.read_csv(CSV_FILE_PATH)
positions = {}

In [None]:
for index, row in game_data.iterrows():   
    board = chess.Board()
    moves = row['moves'].split()
    
    white_id = row['white_id']
    black_id = row['black_id']
    user_side = chess.WHITE if white_id == USER else chess.BLACK
    
    elo_diff = row['white_elo'] - row['black_elo']
    if user_side == chess.BLACK:
        elo_diff = elo_diff * -1
    
    opening = row['opening']
    
    for move in moves:
        if board.turn != user_side:
            board.push_uci(move)
            continue
        
        fen = board.fen()
        if fen not in positions:
            positions[fen] = {
                'opening': opening,
                'elo_diffs': [elo_diff],
                'moves': {move: 1}
            }
        else:
            if move in positions[fen]['moves']:
                positions[fen]['moves'][move] += 1
            else:
                positions[fen]['moves'][move] = 1
            positions[fen]['elo_diffs'].append(elo_diff)
        
        board.push_uci(move)

In [None]:
data = []

for fen, position_data in positions.items():
    total_moves = sum(position_data['moves'].values())
    probabilities = {move: round(count / total_moves, 3) for move, count in position_data['moves'].items()}
    position_data['move_prob'] = probabilities

for fen, position_data in positions.items():
    opening = position_data['opening']
    elo_diffs = position_data['elo_diffs']
    avg_elo_diff = sum(elo_diffs) // len(elo_diffs)
    
    for move, prob in position_data['move_prob'].items():
        data.append({
            'fen': fen,
            'move': move,
            'prob': prob,
            'opening': opening,
            'elo_diff': avg_elo_diff
        })

dataset = pd.DataFrame(data)

In [6]:
DATASET_PATH = '../data/dataset.csv'
dataset.to_csv(DATASET_PATH)
dataset

Unnamed: 0,fen,move,prob,opening,elo_diff
0,rnbqkbnr/pppppppp/8/8/2P5/8/PP1PPPPP/RNBQKBNR ...,g7g6,0.675,A36,-5
1,rnbqkbnr/pppppppp/8/8/2P5/8/PP1PPPPP/RNBQKBNR ...,c7c6,0.169,A36,-5
2,rnbqkbnr/pppppppp/8/8/2P5/8/PP1PPPPP/RNBQKBNR ...,e7e6,0.039,A36,-5
3,rnbqkbnr/pppppppp/8/8/2P5/8/PP1PPPPP/RNBQKBNR ...,g8f6,0.039,A36,-5
4,rnbqkbnr/pppppppp/8/8/2P5/8/PP1PPPPP/RNBQKBNR ...,c7c5,0.039,A36,-5
...,...,...,...,...,...
105288,r1b2rk1/pq1p1p1p/6p1/2pp4/2P5/4PNP1/R2QBPKP/R7...,d2d5,1.000,A45,22
105289,r1b2rk1/p1qp1p1p/6p1/2pQ4/2P5/4PNP1/R3BPKP/R7 ...,d5a8,1.000,A45,22
105290,Q4rk1/pbqp1p1p/6p1/2p5/2P5/4PNP1/R3BPKP/R7 w -...,a8a7,1.000,A45,22
105291,5rk1/Q1qp1p1p/6p1/2p5/2P5/4PbP1/R3BPKP/R7 w - ...,e2f3,1.000,A45,22
