# CS 345 Project
#### _Ben Spencer and Matteo Selvaggio_

#### **Part 0: Importing Data and Libraries**

In [4]:
# installing zstandard package to unpack .zst compressed files
# %pip install zstandard
import zstandard as zstd

# installing chess package which contains useful methods for processing chess boards.
# %pip install chess
import chess

# importing numpy and matplotlib
import numpy as np
import matplotlib.pyplot as plt

# importing sklearn methods
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# importing keras with GPU
# If using Anaconda, use the following commands in a local conda terminal:
# %pip install --upgrade pip
# %conda create -n tf tensorflow
# %conda activate tf
# %pip install tensorflow[and-cuda]
import tensorflow as tf

# import data
# the usecols parameter extracts the PuzzleID, FEN, and Game Moves columns only
# the skiprows parameter only receives the last 10,000 entries
fen_data = np.loadtxt("lichess_db_puzzle.csv", delimiter=',', dtype=str, skiprows=3822157, usecols=(1))
move_list = np.loadtxt("lichess_db_puzzle.csv", delimiter=',', dtype=str, skiprows=3822157, usecols=(2))
print(f'Shape of the features array (X): {fen_data.shape}')
print(f'Head of the features array: {fen_data[:5]}\n')
print(f'Shape of the labels array (Y): {move_list.shape}')
print(f'Head of the labels array: {move_list[:10]}')


Shape of the features array (X): (10000,)
Head of the features array: ['2r3k1/5ppp/3Qb3/p3p3/4P3/5B2/PrP2PPP/2qRK2R b K - 2 24'
 '4r1k1/2p1qpbp/1p4p1/p3p2n/4b3/1PP1PN1P/1P2BPPB/2QR2K1 w - - 2 21'
 '8/8/3k4/2p2p1p/p1P4P/P3Pp2/4K3/8 w - - 0 47'
 'r1b1r1k1/6pp/p1p5/3p4/2qB4/2P2QP1/P1P4P/5RK1 b - - 1 19'
 '8/3P2k1/5bp1/4N3/1p1K4/1P3R2/4r3/8 b - - 0 42']

Shape of the labels array (Y): (10000,)
Head of the labels array: ['c1c2 d6d8 c8d8 d1d8' 'e2d3 e4d3 d1d3 e5e4 d3d2 e4f3'
 'e2f3 d6e5 f3f2 e5e4 f2e2 f5f4 e3f4 e4f4' 'c8h3 f3f7 g8h8 f7g7'
 'e2e5 f3f6 g7f6 d7d8q' 'f6f8 e4g2' 'd8d5 f5e6 d5e6 b3e6' 'e4d5 e1e8'
 'd6c5 g7c7 c5d5 c7c1' 'd1d3 e2e1 b1e1 e6e1']


#### **Part 1: Organizing the Data**

In [41]:
'''
Contains some example code that will be helpful for organizing data.

board_matrix = []

for i in range(10000):
    board_matrix.append(str(chess.Board(fen_data[i,1])))

label = []

for i in range(10000):
    label.append(move_list[i][2:4])

print(label[:5])
'''
data_size = len(move_list)
board_size = 64

def convert_board_to_str(fen):
    rtn = str(chess.Board(fen))
    return rtn

'''
The following function converts characters in the board position to vectors of size 12 to indicate what kind of
piece is in that space. Here is the template:

No piece:               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Black Pawn (p):         [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Black Knight (n):       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Black Bishop (b):       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Black Rook (r):         [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
Black Queen (q):        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
Black King (k):         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]

White Pawn (P):         [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
White Knight (N):       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
White Bishop (B):       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]
White Rook (R):         [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
White Queen (Q):        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
White King (K):         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
'''
def convert_str_to_vector(board_data):
    rtn = []

    for i in range(len(board_data)):
        x = board_data[i]

        if x == ' ' or x == '\n':
            continue
        elif x == '.':
            rtn.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        elif x == 'p':
            rtn.append([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        elif x == 'n':
            rtn.append([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        elif x == 'b':
            rtn.append([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        elif x == 'r':
            rtn.append([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
        elif x == 'q':
            rtn.append([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
        elif x == 'k':
            rtn.append([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])
        elif x == 'P':
            rtn.append([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])
        elif x == 'N':
            rtn.append([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
        elif x == 'B':
            rtn.append([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])
        elif x == 'R':
            rtn.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])
        elif x == 'Q':
            rtn.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])
        elif x == 'K':
            rtn.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
        else:
            # For debugging
            print(f'ERROR: Board Contains Unnaccepted Character: {board_data[i]}')
            break

    return rtn

'''
The following functions will convert the position of a piece in chess (i.e., a4) to a 
numerical value to be used by a classifier.

A rank is considered a horizontal row and a file is a vertical column. Values for each
square are numbered in order from left to right, top to bottom, numbered 0-63.

For example, the space a1 would be assigned the value '0' and the space h8 would be 
assigned the value '63'.
'''
def rank(char):
    if char == 'a':
        return 0
    elif char == 'b':
        return 8
    elif char == 'c':
        return 16
    elif char == 'd':
        return 24
    elif char == 'e':
        return 32
    elif char == 'f':
        return 40
    elif char == 'g':
        return 48
    elif char == 'h':
        return 56
    else:
        raise Exception(f'ERROR: Unsupported rank character: {char}')
    
def file(char):
    if int(char) > 0 and int(char) <= 8:
        return (int(char) - 1)
    else:
        raise Exception(f'ERROR: Unsupported file character: {char}')

def convert_pos_to_val(pos):
    val = 0

    val += rank(pos[0])
    val += file(pos[1])
    return val


63


In [45]:
# Converting FEN data to serialized vector data
X_list = []
board_pos = np.arange(board_size)

for i in range(data_size):
    board_data = convert_board_to_str(fen_data[i])
    board_pos = convert_str_to_vector(board_data)
    X_list.append(board_pos)

X = np.array(X_list)
X.shape

# Converting move list to two label vectors for each model
labels = []

for i in range(data_size):
    val = convert_pos_to_val(move_list[i][:2])
    labels.append(val)

y_move_from = np.array(labels)

labels = []

for i in range(data_size):
    val = convert_pos_to_val(move_list[i][2:4])
    labels.append(val)

y_move_to = np.array(labels)

[17 26 42 ... 27 44 35]
