This notebook contains necessary codes about how to divide the chess games into opening, middle, and end games. It also contains the code about how we can extract features from each chess move.

# Initializations

In [None]:
!pip install chess

In [None]:
from glob import glob
import shutil
import glob
import json
import math
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict, Counter
from statistics import stdev
import scipy.stats as stats
import networkx as nx
import pandas as pd
import codecs
import requests
from bs4 import BeautifulSoup
import re
import pickle
from tqdm import tqdm
tqdm.pandas()
import random
from sklearn.feature_extraction.text import TfidfVectorizer
import csv
from sklearn.metrics import classification_report, precision_score,recall_score
from sklearn.model_selection import train_test_split
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import warnings
import email # for handling email data format

# Ignore all warnings
warnings.filterwarnings('ignore')

# Dividing chess games into opening, middle, end games

In [None]:
import chess
import ast
def is_both_castlings_used(fen):
    # Create a board object from the FEN string
    board = chess.Board(fen)
    # print(board.has_kingside_castling_rights(chess.WHITE))
    # print(board.has_kingside_castling_rights(chess.BLACK))
    # print(board.has_queenside_castling_rights(chess.WHITE))
    # print(board.has_queenside_castling_rights(chess.BLACK))
    # Check if both castlings are used
    return (not board.has_kingside_castling_rights(chess.WHITE) or not board.has_queenside_castling_rights(chess.WHITE)) and (not board.has_kingside_castling_rights(chess.BLACK) or not board.has_queenside_castling_rights(chess.BLACK))

def count_legal_moves(fen):
    # Create a board object from the FEN string
    board = chess.Board(fen)
    # Get the white king square
    white_king_square = board.king(chess.WHITE)
    # print(white_king_square)
    # Get the black king square
    black_king_square = board.king(chess.BLACK)
    board.turn = chess.BLACK  # Switch to BLACK's turn
    # print(board.legal_moves)
    # Count legal moves for the black king
    black_king_moves = sum(1 for move in board.legal_moves if move.from_square == black_king_square)
    # Count legal moves for the white king
    board.turn = chess.WHITE  # Switch to White's turn
    # print(board.legal_moves)
    white_king_moves = sum(1 for move in board.legal_moves if move.from_square == white_king_square)
    return white_king_moves +  black_king_moves

def count_pieces(fen):
    # Create a board object from the FEN string
    board = chess.Board(fen)

    # Count the number of pieces for each color
    white_pieces = len(board.pieces(chess.PAWN, chess.WHITE)) + \
                   len(board.pieces(chess.KNIGHT, chess.WHITE)) + \
                   len(board.pieces(chess.BISHOP, chess.WHITE)) + \
                   len(board.pieces(chess.ROOK, chess.WHITE)) + \
                   len(board.pieces(chess.QUEEN, chess.WHITE)) + \
                   len(board.pieces(chess.KING, chess.WHITE))

    black_pieces = len(board.pieces(chess.PAWN, chess.BLACK)) + \
                   len(board.pieces(chess.KNIGHT, chess.BLACK)) + \
                   len(board.pieces(chess.BISHOP, chess.BLACK)) + \
                   len(board.pieces(chess.ROOK, chess.BLACK)) + \
                   len(board.pieces(chess.QUEEN, chess.BLACK)) + \
                   len(board.pieces(chess.KING, chess.BLACK))

    return white_pieces+ black_pieces

START_MOVE_THRESHOLD = 16
START_REMAINING_PIECE_THRESHOLD = 32-8
END_REMAINING_PIECE_THRESHOLD = 12
END_COMBINED_LEGAL_MOVES_COUNT_KINGS = 8
END_GAME_MOVE_THRESHOLD = 0.55
def move_determine(fen, move_count, total_moves):
  global  END_GAMES_ENTERED, START_GAME_OVER
  percentage_of_game = move_count/total_moves
  # print(is_both_castlings_used(fen))
  if END_GAMES_ENTERED:
    return 3 # end game
  remaining_pieces = count_pieces(fen)
  king_legal_moves = count_legal_moves(fen)
  if move_count<=START_MOVE_THRESHOLD and not is_both_castlings_used(fen) and remaining_pieces>START_REMAINING_PIECE_THRESHOLD:
    return 1 # start game
  if START_GAME_OVER and remaining_pieces<=END_REMAINING_PIECE_THRESHOLD or king_legal_moves>=END_COMBINED_LEGAL_MOVES_COUNT_KINGS or percentage_of_game>END_GAME_MOVE_THRESHOLD:
    END_GAMES_ENTERED = True
    return 3 # end game
  START_GAME_OVER = True
  return 2 # middle game

def check_list_validity(A):
  if A[0]!= 1 or A[-1]!= 3:
    return False
  for i in range(len(A)-1):
    if A[i]>A[i+1]: return False
  return True




In [None]:
df = pd.read_csv('chess_datasets//chess_games.csv') # replace with appropriate file name
new_col = 'move_category'
df[new_col] = ''

save_interval = 1000
for i, row in tqdm(df.iterrows(), total=df.shape[0]):
  moves = ast.literal_eval(row['moves_list'])
  # print(moves)
  total_moves = row['total_moves']
  board = chess.Board()
  A = []
  START_GAME_OVER = False
  END_GAMES_ENTERED = False
  if total_moves<50: END_GAME_MOVE_THRESHOLD = 0.65
  else: END_GAME_MOVE_THRESHOLD = 0.55
  for j in range(len(moves)):
      move_uci = moves[j]
      move = chess.Move.from_uci(move_uci)
      board.push(move)
      cur_board = board.fen()
      A.append(move_determine(cur_board, j+1, total_moves))
      # print(move,cur_board,)
  if not check_list_validity(A):
    continue
  df.at[i, new_col] = A
  # print(A)
  if (i+1) % save_interval == 0:
    print(A)
    df.to_csv('chess_games.csv', index=False,header=True)  # Save DataFrame to CSV file
    print(f"Saved DataFrame after {i+1} iterations.")
df.to_csv('chess_games.csv', index=False,header=True)


# Creating feature list for chess moves

In [None]:
import ast
import chess
import chess.engine
!chmod +x stockfish/stockfish-ubuntu-x86-64-avx2
engine = chess.engine.SimpleEngine.popen_uci("./stockfish/stockfish-ubuntu-x86-64-avx2")

In [None]:
piece_map = {'p': 1, 'n': 3, 'b': 5, 'r': 7, 'q': 9, 'k': 11,  # Black pieces
                 'P': 2, 'N': 4, 'B': 6, 'R': 8, 'Q': 10, 'K': 12}  # White pieces
square_map = {'a':1,'b':2,'c':3,'d':4,'e':5,'f':6,'g':7,'h':8}
color_map = {0:'white',1:'black'}
segment_map = {1: 'start',2:'mid',3:'end'}

In [None]:
def material_balance(fen):
    # Parse the FEN string to create a board object
    board = chess.Board(fen)

    # Get the color to move
    side_to_move = board.turn

    # Calculate material balance for the color to move
    material_count = {
        chess.PAWN: 0,
        chess.KNIGHT: 0,
        chess.BISHOP: 0,
        chess.ROOK: 0,
        chess.QUEEN: 0,
        chess.KING: 0
    }
    opponent_matrial_count =  {
        chess.PAWN: 0,
        chess.KNIGHT: 0,
        chess.BISHOP: 0,
        chess.ROOK: 0,
        chess.QUEEN: 0,
        chess.KING: 0
    }

    # Iterate over the pieces on the board and count their material value
    for square, piece in board.piece_map().items():
        if piece.color == side_to_move:
            material_count[piece.piece_type] += 1
        else:
            opponent_matrial_count[piece.piece_type] += 1

    # print(side_to_move,material_count,opponent_matrial_count)
    # Calculate the material balance
    material_balance = (
        (material_count[chess.PAWN]-opponent_matrial_count[chess.PAWN]) * 1 +
        (material_count[chess.KNIGHT] - opponent_matrial_count[chess.KNIGHT]) * 3 +
        (material_count[chess.BISHOP] -opponent_matrial_count[chess.BISHOP])  * 3 +
        (material_count[chess.ROOK] - opponent_matrial_count[chess.ROOK]) * 5 +
        (material_count[chess.QUEEN] - opponent_matrial_count[chess.QUEEN]) * 9)
    light_pieces = material_count[chess.KNIGHT] + material_count[chess.BISHOP]
    heavy_pieces = material_count[chess.ROOK] + material_count[chess.QUEEN]
    # print(light_pieces, heavy_pieces)
    return material_balance, light_pieces, heavy_pieces

def count_open_files(fen):
    # Parse the FEN string to create a board object
    board = chess.Board(fen)

    # Determine the side to move
    side_to_move = board.turn

    # Initialize a counter for open files
    open_files_count = 0

    # Iterate over each file (column)
    for file_idx in range(8):
        file_mask = chess.BB_FILE_MASKS[file_idx]

        # Check if the file is open (no pawns on the file)
        if board.pawns & file_mask == 0:
            open_files_count += 1

    return open_files_count
def count_semi_open_files(fen):
    # Create a board object from the FEN string
    board = chess.Board(fen)

    # Determine which side is to move
    side_to_move = board.turn

    # Initialize a counter for semi-open files
    semi_open_files = 0

    # Iterate over each file
    for file_index in range(8):
        # Check if the file is open for the side to move but closed for the opponent
        if not any(board.piece_at(chess.square(file_index, rank)) == chess.Piece(chess.PAWN, side_to_move)
                   for rank in range(8)) \
           and any(board.piece_at(chess.square(file_index, rank)) == chess.Piece(chess.PAWN, not side_to_move)
                   for rank in range(8)):
            semi_open_files += 1

    return semi_open_files

def find_items_near_king_position(fen):
    # Create a board object from the FEN string
    board = chess.Board(fen)

    # Find the square of the king
    king_square = board.king(board.turn)
    # print(king_square)

    # Extract file and rank of the given square
    file_index = chess.square_file(king_square)
    rank_index = chess.square_rank(king_square)
    # print(file_index, rank_index)
    own_pieces_count = 0
    own_pawns_count = 0
    opponent_pieces_count = 0
    opponent_pawns_count = 0
    # Iterate over all squares
    for file in range(8):
        for rank in range(8):
            # Calculate the distance between the current square and the given square
            file_distance = abs(file - file_index)
            rank_distance = abs(rank - rank_index)
            total_distance = file_distance + rank_distance

            # Check if the square is within three steps from the given square
            if total_distance <= 3:
                # Convert file and rank indices to square index
                s = 8 * rank + file
                # print(chess.SQUARE_NAMES[s],board.piece_at(s))
                if board.piece_at(s) and board.piece_at(s).color == board.turn and board.piece_at(s).piece_type != chess.KING:
                  own_pieces_count += 1
                if board.piece_at(s) and board.piece_at(s).color == board.turn and board.piece_at(s).piece_type == chess.PAWN:
                  own_pawns_count += 1
                if board.piece_at(s) and board.piece_at(s).color != board.turn and board.piece_at(s).piece_type != chess.KING:
                  opponent_pieces_count += 1
                if board.piece_at(s) and board.piece_at(s).color != board.turn and board.piece_at(s).piece_type == chess.PAWN:
                  opponent_pawns_count += 1
                # squares_within_three.add(chess.SQUARE_NAMES[square_index])
    # Convert the square to algebraic notation
    # print(own_pieces_count, own_pawns_count)
    # print(opponent_pieces_count, opponent_pawns_count)
    return own_pieces_count-own_pawns_count,own_pawns_count,opponent_pieces_count-opponent_pawns_count,opponent_pawns_count

def extract_stockfish_score(fen):
    # Create a board from the FEN string
    board = chess.Board(fen)
    side_to_move = board.turn
    # print(side_to_move)
    # Get the evaluation from Stockfish
    info = engine.analyse(board, chess.engine.Limit(time=0.1))
    # print(info)
    if side_to_move:
      eval_score = str(info["score"].black())
    else:
      eval_score = str(info["score"].white())

    if eval_score[0] == '#':
       eval_score=eval_score[1:]

    eval_score = int(eval_score)
    # # Close the engine
    # engine.quit()
    # best_moves =  [move.uci() for move in info['pv']]
    return  eval_score

def extract_stockfish_score(fen):
    # Create a board from the FEN string
    board = chess.Board(fen)
    side_to_move = board.turn

    # Get the evaluation from Stockfish
    info = engine.analyse(board, chess.engine.Limit(time=0.1))
    # print(info)
    # if side_to_move:
    #   eval_score = str(info["score"].black())
    # else:
    #   eval_score = str(info["score"].white())

    # if eval_score[0] == '#':
    #    eval_score=eval_score[1:]
    eval_score = str(info["score"].white())
    print(fen, side_to_move,info["score"].white(),info["score"].black())
    eval_score = int(eval_score)
    # # Close the engine
    # engine.quit()
    # best_moves =  [move.uci() for move in info['pv']]
    return  eval_score

def extract_stockfish_move(fen):
    # Create a board from the FEN string
    board = chess.Board(fen)
    side_to_move = board.turn
    # print(side_to_move)
    # Get the evaluation from Stockfish
    info = engine.analyse(board, chess.engine.Limit(time=0.1))
    # print(info)
    # if side_to_move:
    #   eval_score = str(info["score"].white())
    # else:
    #   eval_score = str(info["score"].black())

    # if eval_score[0] == '#':
    #    eval_score=eval_score[1:]

    # eval_score = int(eval_score)
    # # Close the engine
    # engine.quit()
    best_moves =  [move.uci() for move in info['pv']]
    return  best_moves[0]

def fen_to_vector(fen):
    # Convert FEN string to a board object
    board = chess.Board(fen)

    # Encode the board state using one-hot encoding
    vector = [0]*64  # 64 squares, 13 piece types (including empty square)

    # print(chess.SQUARES)
    for square in chess.SQUARES:
        piece = board.piece_at(square)
        # print(square, piece.symbol())
        if piece is not None:
            vector[square]= piece_map[piece.symbol()]
    # print(vector)
    return vector

def analyze_move(fen, uci_move):
    # Create a chess board from the FEN string
    board = chess.Board(fen)

    # Parse the UCI move
    move = uci_move

    # Get the piece that moves
    piece_moved = board.piece_at(move.from_square).symbol()

    # Get the from square of the move
    from_square = chess.square_name(move.from_square)

    # Get the to square of the move
    to_square = chess.square_name(move.to_square)

    # Make the move on the board
    # board.push(move)

    # Get the captured piece (if any)
    captured_piece = board.piece_at(move.to_square)

    return piece_moved, from_square, to_square, captured_piece

def get_features(cur_board, move, next_board):
  feature_list = list(fen_to_vector(cur_board))
  piece_moved, from_square, to_square, captured_piece = analyze_move(cur_board, move)
  halfmove_clock = int(cur_board.split()[4])
  feature_list.append(piece_map[piece_moved])
  feature_list.append(square_map[from_square[0]])
  feature_list.append(int(from_square[1]))
  feature_list.append(square_map[to_square[0]])
  feature_list.append(int(to_square[1]))
  if captured_piece:
    feature_list.append(piece_map[captured_piece.symbol()])
  else:
    feature_list.append(0)
  feature_list.append(halfmove_clock)

  material_balance_count, light_pieces, heavy_pieces = material_balance(cur_board)
  feature_list.append(material_balance_count)
  feature_list.append(light_pieces)
  feature_list.append(heavy_pieces)
  feature_list.append(count_open_files(cur_board))
  feature_list.append(count_semi_open_files(cur_board))

  own_pieces_count,own_pawns_count,opponent_pieces_count,opponent_pawns_count = find_items_near_king_position(cur_board)
  feature_list.append(own_pieces_count)
  feature_list.append(own_pawns_count)
  feature_list.append(opponent_pieces_count)
  feature_list.append(opponent_pawns_count)

  best_move = extract_stockfish_move(cur_board)
  eval_score = extract_stockfish_score(next_board)
  feature_list.append(eval_score)

  return eval_score, best_move,feature_list

def get_optimal_move_percentages(move_types, optimal_moves, player_map):
  val = 0  if player_map[0]=='human' else 1
  move_counter = Counter(move_types)
  # print(val, move_counter)
  if 1 not in  move_counter: move_counter[1]=2
  if 2 not in  move_counter:
    # print('hmmm')
    move_counter[2]=2
  if 3 not in  move_counter: move_counter[3]=2
  # print(val, move_counter)
  # print(optimal_moves)
  human_optimal_start, human_optimal_mid, human_optimal_end, ai_optimal_start, ai_optimal_mid, ai_optimal_end = 0,0,0,0,0,0
  for i in range(len(move_types)):
    if i%2 == val: # assigning them as human moves
      if move_types[i] == 1 and optimal_moves[i]==1: # start game
        human_optimal_start += 1
      elif move_types[i] == 2 and optimal_moves[i]==1: # mid game
        human_optimal_mid += 1
      elif move_types[i] == 3 and optimal_moves[i]==1:
        human_optimal_end += 1
    else:
      # print(i, move_types[i],optimal_moves[i])
      if move_types[i] == 1 and optimal_moves[i]==1: # start game
        ai_optimal_start += 1

      elif move_types[i] == 2 and optimal_moves[i]==1: # mid game
        ai_optimal_mid += 1
      elif move_types[i] == 3 and optimal_moves[i]==1:
        ai_optimal_end += 1

  # print(human_optimal_start, human_optimal_mid, human_optimal_end, ai_optimal_start, ai_optimal_mid, ai_optimal_end)

  human_optimal_start = round(human_optimal_start*100/(move_counter[1]/2),2)
  human_optimal_mid = round(human_optimal_mid*100/(move_counter[2]/2),2)
  human_optimal_end = round(human_optimal_end*100/(move_counter[3]/2),2)

  ai_optimal_start = round(ai_optimal_start*100/(move_counter[1]/2),2)
  ai_optimal_mid = round(ai_optimal_mid*100/(move_counter[2]/2),2)
  ai_optimal_end = round(ai_optimal_end*100/(move_counter[3]/2),2)

  # print(human_optimal_start, human_optimal_mid, human_optimal_end, ai_optimal_start, ai_optimal_mid, ai_optimal_end)
  return human_optimal_start, human_optimal_mid, human_optimal_end, ai_optimal_start, ai_optimal_mid, ai_optimal_end


In [None]:
df = pd.read_csv('chess_datasets//chess_games.csv') # replace with appropriate file name

ROWS = []
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
  moves = ast.literal_eval(row['moves_list'])
  move_types = ast.literal_eval(row['move_category'])
  ai_player = row['ai_player']
  # print(row['winner'])
  if ai_player == 'black': player_map = {0:'human',1:'ai'}
  else: player_map = {1:'human',0:'ai'}
  # print(row['game_id'],moves )
  board = chess.Board()
  optimal_moves = []
  for i in range(len(moves)):
      D = {}
      D['game_id'] = row['game_id']
      D['move_count'] = i+1
      D['color'] =  color_map[i%2]
      D['player_type'] =  player_map[i%2]
      move_uci = moves[i]
      move = chess.Move.from_uci(move_uci)
      cur_board = board.fen()
      D['move_name'] = moves[i]
      D['move_type'] = segment_map[move_types[i]]
      board.push(move)
      D['eval_score'],D['suggested_move'],D['feature_list']= get_features(cur_board, move,board.fen())
      if D['suggested_move'] == D['move_name']:
        D['played_optimal_move'] = 1
      else:
        D['played_optimal_move'] = 0
      optimal_moves.append(D['played_optimal_move'])
      ROWS.append(D)
      # print(move,cur_board,color,player_type)
      # print(D)
  human_optimal_start, human_optimal_mid, human_optimal_end, ai_optimal_start, ai_optimal_mid, ai_optimal_end =get_optimal_move_percentages(move_types, optimal_moves, player_map)
  df.at[index, 'human_optimal_start'] = human_optimal_start
  df.at[index, 'human_optimal_mid'] = human_optimal_mid
  df.at[index, 'human_optimal_end'] = human_optimal_end
  df.at[index, 'ai_optimal_start'] = ai_optimal_start
  df.at[index, 'ai_optimal_mid'] = ai_optimal_mid
  df.at[index, 'ai_optimal_end'] = ai_optimal_end
  # break
df.to_csv('chess_games_features.csv',index=False, header=True)