In [1]:
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import glob

In [2]:
path = "../Data/DataTrain"

letters = 'abcdefghij'

csv_files = []

for let in letters:
    csv_files.extend(glob.glob(f'{path}/Chess_Jan_{let}*'))



In [3]:
df_large = pd.concat(
    (pd.read_csv(f,
                 usecols=['board', 'cp', 'white_active', 'white_elo', 'black_elo'], 
                 dtype = {'white_elo': 'uint16', 'black_elo': 'uint16', 'white_active': 'bool', 'cp': 'object'},
                ) for f in csv_files), 
    ignore_index = True,
)

In [4]:
def fen_str_to_1d_array(fen):
    """
    Converts a FEN string representation of a chess board to a 1-d vector array representation.

    Args:
        fen (str): The FEN string representing the chess board.

    Returns:
        np.ndarray: A array vector representation of the chess board.

    Example:
        >>> fen_str_to_flat_tensor('rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1')
        tensor([[ -4.,  -2.,  -3.,  -5.,  -6.,  -3.,  -2.,  -4.],
                [ -1.,  -1.,  -1.,  -1.,  -1.,  -1.,  -1.,  -1.],
                [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
                [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
                [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
                [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
                [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
                [  4.,   2.,   3.,   5.,   6.,   3.,   2.,   4.]])
    """    
    # Define a mapping from pieces to integers
    piece_to_int = {
        'P': 1, 'N': 2, 'B': 3, 'R': 4, 'Q': 5, 'K': 6,
        'p': -1, 'n': -2, 'b': -3, 'r': -4, 'q': -5, 'k': -6,
    }

    # Split the FEN string into parts ## 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'
    parts = fen.split(' ')
    ranks = parts[0].split('/') # Only process the board position (the first part)

    # Convert the ranks to a list of integers
    board = []
    for rank in ranks:
        for char in rank:
            if char.isdigit():
                # If the character is a digit, add that many zeros to the board
                board.extend([0] * int(char))
            else:
                # Otherwise, add the integer representation of the piece to the board
                board.append(piece_to_int[char])

    # Convert the board to a tensor
    board_array = np.array(board, dtype='float32')

    return board_array

In [None]:
import chess

def get_number_of_pieces(fen_str):
    """
    Get the number of pieces of a given type and color on the board.

    Parameters
    ----------
    board : chess.Board
        The chess board.

    Returns
    -------
    list of int, length 10
        Number of pieces on the board (not including kings)
        [white_pawns, white_knights, ... , black_rooks, black_queens]
    """
    
    piece_counts = []
    max_starting_pieces = [8, 2, 2, 2, 1]
    values = [1, 3, 3.1, 5, 9]

    board = chess.Board(fen_str)

    for j, color in enumerate([chess.WHITE, chess.BLACK]): # 0 for white, 1 for black
        for i, piece in enumerate([chess.PAWN, chess.KNIGHT, chess.BISHOP, chess.ROOK, chess.QUEEN]):
            piece_counts.append(len(board.pieces(piece, color)) / max_starting_pieces[i] * values[i] * (-1)**j / 2) # White pieces are positive, black pieces are negative
    
    return np.array(piece_counts)

In [5]:
# # Feature Variables
# board = df_large['board']
white_active = df_large['white_active']
cp = pd.to_numeric(df_large['cp'], errors='coerce')

X = []

for i in range(len(cp)):

    fen_str = df_large['board'][i]

    if np.isnan(cp[i]) and white_active[i]:
        cp[i] = 10
    elif np.isnan(cp[i]) and not white_active[i]:
        cp[i] = -10
    elif cp[i] > 9:
        cp[i] = 9
    elif cp[i] < -9:
        cp[i] = -9

    piece_counts = get_number_of_pieces(fen_str)

    inputs = np.concatenate((fen_str_to_1d_array(fen_str), piece_counts))

    X.append(inputs)

X = np.array(X, dtype='float32')

# # Convert data to tensors
# board_tensor = fen_str_to_3d_array(board)
# white_active = np.array(white_active, dtype='float32')

cp = np.array(cp, dtype='float32')

In [6]:
df_large

Unnamed: 0,white_elo,black_elo,cp,white_active,board
0,1702,1628,0.1,True,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...
1,1702,1628,0.12,False,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...
2,1702,1628,0.37,True,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...
3,1702,1628,0.23,False,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...
4,1702,1628,0.15,True,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...
...,...,...,...,...,...
25999995,1104,1213,,False,1kr3nr/p4p2/3p2pp/4p3/4P1PP/1p6/2p5/2K5 b - - ...
25999996,1104,1213,,True,1kr3nr/p4p2/3p2pp/4p3/4P1PP/8/1pp5/2K5 w - - 0 37
25999997,1104,1213,,False,1kr3nr/p4p2/3p2pp/4p3/4P1PP/8/1Kp5/8 b - - 0 37
25999998,1104,1213,,True,1kr3nr/p4p2/3p2pp/4p3/4P1PP/8/1K6/2q5 w - - 0 38


In [7]:
# for i in range(len(cp)):

#     if np.isnan(cp[i]) and white_active[i]:
#         cp[i] = 10
#     elif np.isnan(cp[i]) and not white_active[i]:
#         cp[i] = -10
#     elif cp[i] > 9:
#         cp[i] = 9
#     elif cp[i] < -9:
#         cp[i] = -9

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, cp, test_size=0.2, random_state=42)

In [9]:
X_train.shape

(20800000, 64)

In [10]:
y_train.shape

(20800000,)

In [11]:
model = RandomForestRegressor(
        n_estimators = 25, 
        random_state = 0,
        verbose = 1,
        n_jobs = -1, # run in parallel processing
        max_features = 30, # 74 features, good rule of thumb is 1/3 for regression
)
model.fit(X_train, y_train)

In [22]:
def get_current_version():
    from pathlib import Path
    import pickle

    model_file_path = 'model_RF_1.pkl'
    counter = 1

    while Path(model_file_path).is_file(): # ensure that no files are overwritten
        counter += 1
        model_file_path = f'model_RF_{counter}.pth'
    
    return counter

In [23]:
version = get_current_version()
version

2

In [24]:
import pickle

filename = f'model_RF_{version}.pkl'

# pickle.dump(model, open(filename, 'wb'))

# with open(filename, 'wb') as file:
#     pickle.dump(model, file)

import joblib

joblib.dump(model, filename, compress=9)

In [17]:
model_loaded = joblib.load(filename)

MemoryError: Unable to allocate 1.13 GiB for an array with shape (18926589,) and data type {'names': ['left_child', 'right_child', 'feature', 'threshold', 'impurity', 'n_node_samples', 'weighted_n_node_samples', 'missing_go_to_left'], 'formats': ['<i8', '<i8', '<i8', '<f8', '<f8', '<i8', '<f8', 'u1'], 'offsets': [0, 8, 16, 24, 32, 40, 48, 56], 'itemsize': 64}

In [13]:
y_pred = model.predict(X_test)

In [14]:
np.sum(np.abs(y_pred - y_test)) / len(y_test)

2.1844157396157975

In [None]:
def predict_DT(model, fen, move_number=7, stochastic=True):

    board = chess.Board(fen)
    legal_moves_list = list(board.legal_moves)
    evals_list = []
    
    for move in legal_moves_list:
        
        # is_capture = board.is_capture(move)

        board.push(move)
        fen_array = fen_str_to_1d_array(board.fen())
        # print(fen_array.shape)

        pieces_counts = get_number_of_pieces(board.fen())


        inputs = np.concatenate((fen_array, pieces_counts))

        inputs = inputs.reshape(1, -1)

        eval_prediction = model.predict(inputs)

        evals_list.append(eval_prediction)

        if board.is_checkmate():
            return move # Always make a move which gives checkmate if possible.

        board.pop()

        # New portion (added 2024-04-09)
        if board.is_capture(move):
            if board.turn:
                evals_list[-1] += 0.5 # Modify to add piece value eventually
            else:
                evals_list[-1] -= 0.5 # Modify to add piece value eventually
    

    evals_list = np.array(evals_list)
    # print(evals_list)
    # print(np.array(legal_moves_list))

    sorted_indices = np.argsort(evals_list)
    
    # print(sorted_indices)

    if board.turn:
        '''
        if it's white's turn, we must reverse the array such that the highest evaluation is first
        if it's black's turn, keep the array ascending such that the lowest evaluation for the white pieces is first
        ''' 
        sorted_indices = sorted_indices[::-1]
    
    # print(np.array(legal_moves_list).shape)

    # Use the sorted indices to sort legal_moves and evals_list
    sorted_legal_moves = np.array(legal_moves_list)[sorted_indices]
    sorted_evals_list = evals_list[sorted_indices]

    if not stochastic: # if not using stochastic mode return best move
        return sorted_legal_moves[0]

    sample = np.random.random_sample()

    # print(sample)
    # print(sorted_legal_moves)

    if sample <= 0.65 or move_number > 7: # 65% chance for best move
        # print(f'playing best move')
        return sorted_legal_moves[0]
    elif sample <= 0.85 or move_number > 5: # 25% chance for second-best move
        return sorted_legal_moves[1]
    elif sample <= 0.975 or move_number > 3: #  7.5% chance for third-best move
        return sorted_legal_moves[2]
    else: # 2.5% chance for fourth-best move
        return sorted_legal_moves[3]