# Training the optimal model

In [1]:
# Loading packages
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow
import keras
from keras.models import load_model
import os
import sys

In [2]:
# Importign local modules
cwd = os.getcwd()
parent_directory = os.path.abspath(os.path.join(cwd, "..", ".."))
sys.path.append(parent_directory)

from utils.move_encoding import encode_move, decode_move
from utils.board_encoding import encode_board, fen_to_board

In [3]:
#Improving GPU performance
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

### Loading the dataset

There are two options:
1. Load one dataframe containing the full dataset
2. Create the dataframe from a folder that contains a set of partial dataframes

In [4]:
#Loading the files
file_location = 'data/stockfish_generated_data/prepared_data'
file_path = os.path.join(parent_directory, file_location)

files = os.listdir(file_path)
numOfEach = len(files) // 2 # half are moves, other half are positions


In [5]:
allMoves = []
allBoards = []
FRACTION_OF_DATA = 1

for i in range(numOfEach):
    
    try:
        moves = np.load(f"../../data/stockfish_generated_data/prepared_data/moves{i}.npy", allow_pickle=True)
        
        boards = np.load(f"../../data/stockfish_generated_data/prepared_data/positions{i}.npy", allow_pickle=True)
        
        if (len(moves) != len(boards)):
            print("ERROR ON i = ", i, len(moves), len(boards))
        
        allMoves.extend(moves)
        allBoards.extend(boards)
    
    except:
        print("error: could not load ", i, ", but is still going")

allMoves = np.array(allMoves)[:(int(len(allMoves) * FRACTION_OF_DATA))]

allBoards = np.array(allBoards)[:(int(len(allBoards) * FRACTION_OF_DATA))]

assert len(allMoves) == len(allBoards), "MUST BE OF SAME LENGTH"

In [None]:
allBoards = allBoards.astype("float32")
allMoves = allMoves.astype("float32")

print(allBoards[0].shape)
print(allMoves.shape)

(8, 8, 14)


In [None]:
test_size=0.2

x_train, x_val, y_train, y_val = train_test_split(
    allBoards, allMoves, test_size=test_size, random_state=42
)

In [None]:
encoded_board_length = 8 * 8 * 14
encoded_moves_length = 4672

# Defining, compiling, and fitting the optimal model
model = keras.Sequential(
    [
        keras.layers.Flatten(input_shape=(8,8,14)),
        keras.layers.Dense(units=1500, activation="relu"),
        keras.layers.Dense(units=1000, activation="relu"),
        keras.layers.Dense(units=750, activation="relu"),
        keras.layers.Dense(units=500, activation="relu"),
        keras.layers.Dense(units=encoded_moves_length, activation="softmax"),
    ]
)

model.compile(
    optimizer="Adam",
    loss="SparseCategoricalCrossentropy",  # Because y is an integer representation
    metrics=["accuracy"],
)

history = model.fit(x=x_train, y=y_train, epochs=2, validation_split=0.2)

Epoch 1/2
Epoch 2/2


In [None]:
model.save("../saved_models/01_initial_model.h5")

### Training the optimal model

The current optimal consists of:
- 1 flatten input layer
- 4 dense hidden layers, using the ReLu activation function and the number of neurons ranging from 1500 to 500
- 1 dense output layer, using the Softmax activation function