In [1]:
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import chess

import glob

RF1:  
piececounts - `len(board.pieces(piece, color)) / max_starting_pieces[i] * values[i] * (-1)**j`  
estimators: 100  
files: a to e  
criterion: MSE  
model = RandomForestRegressor(
        n_estimators = 25, 
        random_state = 0,)

RF2:  
piececounts - `len(board.pieces(piece, color)) / max_starting_pieces[i] * values[i] * (-1)**j / 2`  
estimators: 125  
files: a to g  
criterion: MSE  
model = RandomForestRegressor(
        n_estimators = 25, 
        random_state = 0,)

RF3: 
piececounts - `len(board.pieces(piece, color)) / max_starting_pieces[i] * values[i] * (-1)**j / 2`  
estimators: 50
files: a through k
criterion: MSE  
model = RandomForestRegressor(
        n_estimators = 25, 
        random_state = 0,)

RF4:
piececounts - see above
estimators: 25
files: a through m
criterion: MAD  
model = RandomForestRegressor(
        n_estimators = 25, 
        random_state = 0,
        criterion = 'absolute_error', # L1 Loss for simplification
        verbose = 1,
        n_jobs = -1, # run in parallel processing
        max_features = 30, # 74 features, good rule of thumb is 1/3 for regression
)

In [2]:
def get_current_version():
    from pathlib import Path
    import pickle

    model_file_path = 'model_RF_1.pkl'
    counter = 1

    while Path(model_file_path).is_file(): # ensure that no files are overwritten
        counter += 1
        model_file_path = f'model_RF_{counter}.pkl'
    
    return counter

MODEL_NUMBER = get_current_version()
MODEL_NUMBER

4

In [3]:
path = "../Data/DataTrain"

letters = 'abcdefghijklm'
csv_files = []

for let in letters:
    csv_files.extend(glob.glob(f'{path}/Chess_Jan_{let}*'))

In [4]:
df_large = pd.concat(
    (pd.read_csv(f,
                 usecols=['board', 'cp', 'white_active'],#, 'white_elo', 'black_elo'], 
                 dtype = {'white_elo': 'uint16', 'black_elo': 'uint16', 'white_active': 'bool', 'cp': 'object'},
                ) for f in csv_files), 
    ignore_index = True,
)

In [5]:
import chess_SK_lib as lib_SK

In [6]:
X, y = lib_SK.preprocess_XY(df_large['board'], pd.to_numeric(df_large['cp'], errors='coerce'), df_large['white_active'])

100%|██████████| 1800000/1800000 [02:04<00:00, 14428.17it/s]


In [7]:
X.shape

(1800000, 74)

In [9]:
import joblib

joblib.dump(X, 'data_X_large.joblib', compress=3)
joblib.dump(y, 'data_y_large.joblib', compress=3)

['data_y_large.joblib']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [11]:
model = RandomForestRegressor(
    n_estimators = 25, 
    random_state = 0,
    criterion = 'absolute_error', # L1 Loss for simplification
    verbose = 1,
    n_jobs = -1, # run in parallel processing
    max_features = 30, # 74 features, good rule of thumb is 1/3 for regression
)
model.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.


KeyboardInterrupt: 

In [None]:
y_pred = model.predict(X_test)

In [None]:
np.sum(np.abs(y_pred - y_test)) / len(y_test)

In [None]:
import joblib

filename = f'model_RF_{version}.joblib'

joblib.dump(model, filename, compress=3)

In [None]:
model_loaded = joblib.load(filename)

In [None]:
def predict_DT(model, fen, move_number = 5, stochastic = True):

    board = chess.Board(fen)
    legal_moves_list = list(board.legal_moves)
    evals_list = []
    
    for move in legal_moves_list:
        
        # is_capture = board.is_capture(move)

        board.push(move)
        fen_array = fen_str_to_1d_array(board.fen())
        # print(fen_array.shape)

        pieces_counts = get_number_of_pieces(board.fen())

        inputs = np.concatenate((fen_array, pieces_counts))

        inputs = inputs.reshape(1, -1)

        eval_prediction = model.predict(inputs)

        evals_list.append(eval_prediction)

        if board.is_checkmate():
            return move # Always make a move which gives checkmate if possible.

        board.pop()

        # New portion (added 2024-04-09)
        if board.is_capture(move):
            if board.turn:
                evals_list[-1] += 0.5 # Modify to add piece value eventually
            else:
                evals_list[-1] -= 0.5 # Modify to add piece value eventually
    

    evals_list = np.array(evals_list)
    # print(evals_list)
    # print(np.array(legal_moves_list))

    sorted_indices = np.argsort(evals_list)
    
    # print(sorted_indices)

    if board.turn:
        '''
        if it's white's turn, we must reverse the array such that the highest evaluation is first
        if it's black's turn, keep the array ascending such that the lowest evaluation for the white pieces is first
        ''' 
        sorted_indices = sorted_indices[::-1]
    
    # print(np.array(legal_moves_list).shape)

    # Use the sorted indices to sort legal_moves and evals_list
    sorted_legal_moves = np.array(legal_moves_list)[sorted_indices]
    sorted_evals_list = evals_list[sorted_indices]

    if not stochastic: # if not using stochastic mode return best move
        return sorted_legal_moves[0]

    sample = np.random.random_sample()

    # print(sample)
    # print(sorted_legal_moves)

    if sample <= 0.65 or move_number > 7: # 65% chance for best move
        # print(f'playing best move')
        return sorted_legal_moves[0]
    elif sample <= 0.85 or move_number > 5: # 25% chance for second-best move
        return sorted_legal_moves[1]
    elif sample <= 0.975 or move_number > 3: #  7.5% chance for third-best move
        return sorted_legal_moves[2]
    else: # 2.5% chance for fourth-best move
        return sorted_legal_moves[3]

In [None]:
board = chess.Board()

In [None]:
import time

try:
    while True:
        time.sleep(1)
        move = predict_DT(model_loaded, board.fen())
        print(move)
        board.push(move[0])
        print(board)#.unicode()
except:
    pass