In [1]:
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import glob

In [2]:
path = "../Data/DataTrain"

letters = 'abcdefghij'

csv_files = []

for let in letters:
    csv_files.extend(glob.glob(f'{path}/Chess_Jan_{let}*'))



In [3]:
df_large = pd.concat(
    (pd.read_csv(f,
                 usecols=['board', 'cp', 'white_active', 'white_elo', 'black_elo'], 
                 dtype = {'white_elo': 'uint16', 'black_elo': 'uint16', 'white_active': 'bool', 'cp': 'object'},
                ) for f in csv_files), 
    ignore_index = True,
)

In [4]:
def fen_str_to_1d_array(fen):
    """
    Converts a FEN string representation of a chess board to a 1-d vector array representation.

    Args:
        fen (str): The FEN string representing the chess board.

    Returns:
        np.ndarray: A array vector representation of the chess board.

    Example:
        >>> fen_str_to_flat_tensor('rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1')
        tensor([[ -4.,  -2.,  -3.,  -5.,  -6.,  -3.,  -2.,  -4.],
                [ -1.,  -1.,  -1.,  -1.,  -1.,  -1.,  -1.,  -1.],
                [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
                [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
                [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
                [  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
                [  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.],
                [  4.,   2.,   3.,   5.,   6.,   3.,   2.,   4.]])
    """    
    # Define a mapping from pieces to integers
    piece_to_int = {
        'P': 1, 'N': 2, 'B': 3, 'R': 4, 'Q': 5, 'K': 6,
        'p': -1, 'n': -2, 'b': -3, 'r': -4, 'q': -5, 'k': -6,
    }

    # Split the FEN string into parts ## 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'
    parts = fen.split(' ')
    ranks = parts[0].split('/') # Only process the board position (the first part)

    # Convert the ranks to a list of integers
    board = []
    for rank in ranks:
        for char in rank:
            if char.isdigit():
                # If the character is a digit, add that many zeros to the board
                board.extend([0] * int(char))
            else:
                # Otherwise, add the integer representation of the piece to the board
                board.append(piece_to_int[char])

    # Convert the board to a tensor
    board_array = np.array(board, dtype='float32')

    return board_array

In [5]:
# # Feature Variables
# board = df_large['board']
white_active = df_large['white_active']
cp = pd.to_numeric(df_large['cp'], errors='coerce')

X = []

for i in range(len(cp)):

    if np.isnan(cp[i]) and white_active[i]:
        cp[i] = 10
    elif np.isnan(cp[i]) and not white_active[i]:
        cp[i] = -10
    elif cp[i] > 9:
        cp[i] = 9
    elif cp[i] < -9:
        cp[i] = -9

    X.append(fen_str_to_1d_array(df_large['board'][i]))

X = np.array(X, dtype='float32')

# # Convert data to tensors
# board_tensor = fen_str_to_3d_array(board)
# white_active = np.array(white_active, dtype='float32')

cp = np.array(cp, dtype='float32')

In [6]:
df_large

Unnamed: 0,white_elo,black_elo,cp,white_active,board
0,1702,1628,0.1,True,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...
1,1702,1628,0.12,False,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...
2,1702,1628,0.37,True,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...
3,1702,1628,0.23,False,rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQK...
4,1702,1628,0.15,True,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNB...
...,...,...,...,...,...
25999995,1104,1213,,False,1kr3nr/p4p2/3p2pp/4p3/4P1PP/1p6/2p5/2K5 b - - ...
25999996,1104,1213,,True,1kr3nr/p4p2/3p2pp/4p3/4P1PP/8/1pp5/2K5 w - - 0 37
25999997,1104,1213,,False,1kr3nr/p4p2/3p2pp/4p3/4P1PP/8/1Kp5/8 b - - 0 37
25999998,1104,1213,,True,1kr3nr/p4p2/3p2pp/4p3/4P1PP/8/1K6/2q5 w - - 0 38


In [7]:
for i in range(len(cp)):

    if np.isnan(cp[i]) and white_active[i]:
        cp[i] = 10
    elif np.isnan(cp[i]) and not white_active[i]:
        cp[i] = -10
    elif cp[i] > 9:
        cp[i] = 9
    elif cp[i] < -9:
        cp[i] = -9

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, cp, test_size=0.2, random_state=42)

In [9]:
X_train.shape

(20800000, 64)

In [10]:
y_train.shape

(20800000,)

In [11]:
model = RandomForestRegressor(n_estimators=50, random_state=42)
model.fit(X_train, y_train)

In [None]:
import pickle

filename = f'model_RF_1.pkl'

pickle.dump(model, open(filename, 'wb'))

In [None]:
y_pred = model.predict(X_test)

In [None]:
np.sum(np.abs(y_pred - y_test)) / len(y_test)

In [None]:
from tqdm import tqdm

# Your loop
for i in tqdm(range(10)):
    # Code inside the loop
    # ...
    pass
