In [None]:
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import chess

import glob

In [None]:
import chess_SL_DT_lib as libDT

In [None]:
path = "../Data/DataTrain"

letters = 'abcdefghi'

csv_files = []

for let in letters:
    csv_files.extend(glob.glob(f'{path}/Chess_Jan_{let}*'))

In [None]:
df_large = pd.concat(
    (pd.read_csv(f,
                 usecols=['board', 'cp', 'white_active'],# 'white_elo', 'black_elo'], 
                 dtype = {'white_elo': 'uint16', 'black_elo': 'uint16', 'white_active': 'bool', 'cp': 'object'},
                ) for f in csv_files), 
    ignore_index = True,
)

In [None]:
# # Feature Variables
# board = df_large['board']
white_active = df_large['white_active']
cp = pd.to_numeric(df_large['cp'], errors='coerce')

X = []

for i in range(len(cp)):

    fen_str = df_large['board'][i]

    if np.isnan(cp[i]) and white_active[i]:
        cp[i] = 10
    elif np.isnan(cp[i]) and not white_active[i]:
        cp[i] = -10
    elif cp[i] > 9:
        cp[i] = 9
    elif cp[i] < -9:
        cp[i] = -9

    piece_counts = libDT.get_number_of_pieces(fen_str)

    inputs = np.concatenate((libDT.fen_str_to_1d_array(fen_str), piece_counts))

    X.append(inputs)

X = np.array(X, dtype='float32')

# # Convert data to tensors
# board_tensor = fen_str_to_3d_array(board)
# white_active = np.array(white_active, dtype='float32')

cp = np.array(cp, dtype='float32')

In [None]:
del df_large

In [None]:
import joblib

joblib.dump(X, 'data_X.joblib', compress = 6)
joblib.dump(cp, 'data_y.joblib', compress = 6)

In [None]:
X_loaded = joblib.load('data_X.joblib')
y_loaded = joblib.load('data_y.joblib')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_loaded, y_loaded, test_size = 0.2, random_state = 0)

In [None]:
model = RandomForestRegressor(
        n_estimators = 25, 
        random_state = 0,
        criterion = 'absolute_error', # L1 Loss for simplification
        verbose = 1,
        # n_jobs = 2, # run in parallel processing
        max_features = 30, # 74 features, good rule of thumb is 1/3 for regression
)
model.fit(X_train, y_train)

In [None]:
version = libDT.get_current_version()
version

In [None]:
import joblib

filename = f'model_RF_{version}.joblib'

joblib.dump(model, filename, compress=9)

In [None]:
model_loaded = joblib.load(filename)

In [None]:
y_pred = model.predict(X_test)

In [None]:
np.sum(np.abs(y_pred - y_test)) / len(y_test)

In [None]:
def predict_DT(model, fen, move_number=7, stochastic=True):

    board = chess.Board(fen)
    legal_moves_list = list(board.legal_moves)
    evals_list = []
    
    for move in legal_moves_list:
        
        # is_capture = board.is_capture(move)

        board.push(move)
        fen_array = libDT.fen_str_to_1d_array(board.fen())
        # print(fen_array.shape)

        pieces_counts = libDT.get_number_of_pieces(board.fen())


        inputs = np.concatenate((fen_array, pieces_counts))

        inputs = inputs.reshape(1, -1)

        eval_prediction = model.predict(inputs)

        evals_list.append(eval_prediction)

        if board.is_checkmate():
            return move # Always make a move which gives checkmate if possible.

        board.pop()

        # New portion (added 2024-04-09)
        if board.is_capture(move):
            if board.turn:
                evals_list[-1] += 0.5 # Modify to add piece value eventually
            else:
                evals_list[-1] -= 0.5 # Modify to add piece value eventually
    

    evals_list = np.array(evals_list)
    # print(evals_list)
    # print(np.array(legal_moves_list))

    sorted_indices = np.argsort(evals_list)
    
    # print(sorted_indices)

    if board.turn:
        '''
        if it's white's turn, we must reverse the array such that the highest evaluation is first
        if it's black's turn, keep the array ascending such that the lowest evaluation for the white pieces is first
        ''' 
        sorted_indices = sorted_indices[::-1]
    
    # print(np.array(legal_moves_list).shape)

    # Use the sorted indices to sort legal_moves and evals_list
    sorted_legal_moves = np.array(legal_moves_list)[sorted_indices]
    sorted_evals_list = evals_list[sorted_indices]

    if not stochastic: # if not using stochastic mode return best move
        return sorted_legal_moves[0]

    sample = np.random.random_sample()

    # print(sample)
    # print(sorted_legal_moves)

    if sample <= 0.65 or move_number > 7: # 65% chance for best move
        # print(f'playing best move')
        return sorted_legal_moves[0]
    elif sample <= 0.85 or move_number > 5: # 25% chance for second-best move
        return sorted_legal_moves[1]
    elif sample <= 0.975 or move_number > 3: #  7.5% chance for third-best move
        return sorted_legal_moves[2]
    else: # 2.5% chance for fourth-best move
        return sorted_legal_moves[3]