In [17]:
first_dataset_path = '../datasets/dataset 1'
second_dataset_path = '../datasets/dataset 2'
piece_images_path = 'processed_data'

In [9]:
import os
import json
import numpy as np
import cv2 as cv
from tensorflow.keras.preprocessing.image import ImageDataGenerator

os.listdir(second_dataset_path)

['test', 'train', 'val']

In [None]:
shuffle_data = True

# First dataset

In [3]:
# data proportions
TRAIN_DATA = 0.7
TEST_DATA = 0.15
VAL_DATA = 0.15

# sizes used in training
IMAGE_SIZE = 128
BATCH_SIZE = 64

# factor of increasing the cropped version of images
IMAGE_GROWTH_FACTOR = 0.1

In [10]:
# loading config variables: types of pieces and cell coordinates
dataset_path = first_dataset_path + '/data'
config = json.load(open(dataset_path + '/config.json', "r"))
piecesTypes = config['piecesTypes']
cellsCoordinates = config['cellsCoordinates']
piecesTypes.append('empty')

In [5]:
minCoords = np.array([np.inf, np.inf])
maxCoords = np.array([-np.inf, -np.inf])
for cell in cellsCoordinates:
    minCoords = np.minimum(minCoords, cellsCoordinates[cell])
    maxCoords = np.maximum(maxCoords, cellsCoordinates[cell])
boardSize = (maxCoords - minCoords + np.array([1, 1])).astype(int)
marginSize = np.array([1, 1]) * IMAGE_SIZE * IMAGE_GROWTH_FACTOR
cellSize = (IMAGE_SIZE - marginSize) / boardSize
relativeCellSize = (1 - IMAGE_GROWTH_FACTOR) / boardSize

In [6]:
cellsCoordinates

{'A1': [0, 0],
 'A2': [0, 1],
 'A3': [0, 2],
 'A4': [0, 3],
 'A5': [0, 4],
 'A6': [0, 5],
 'A7': [0, 6],
 'A8': [0, 7],
 'B1': [1, 0],
 'B2': [1, 1],
 'B3': [1, 2],
 'B4': [1, 3],
 'B5': [1, 4],
 'B6': [1, 5],
 'B7': [1, 6],
 'B8': [1, 7],
 'C1': [2, 0],
 'C2': [2, 1],
 'C3': [2, 2],
 'C4': [2, 3],
 'C5': [2, 4],
 'C6': [2, 5],
 'C7': [2, 6],
 'C8': [2, 7],
 'D1': [3, 0],
 'D2': [3, 1],
 'D3': [3, 2],
 'D4': [3, 3],
 'D5': [3, 4],
 'D6': [3, 5],
 'D7': [3, 6],
 'D8': [3, 7],
 'E1': [4, 0],
 'E2': [4, 1],
 'E3': [4, 2],
 'E4': [4, 3],
 'E5': [4, 4],
 'E6': [4, 5],
 'E7': [4, 6],
 'E8': [4, 7],
 'F1': [5, 0],
 'F2': [5, 1],
 'F3': [5, 2],
 'F4': [5, 3],
 'F5': [5, 4],
 'F6': [5, 5],
 'F7': [5, 6],
 'F8': [5, 7],
 'G1': [6, 0],
 'G2': [6, 1],
 'G3': [6, 2],
 'G4': [6, 3],
 'G5': [6, 4],
 'G6': [6, 5],
 'G7': [6, 6],
 'G8': [6, 7],
 'H1': [7, 0],
 'H2': [7, 1],
 'H3': [7, 2],
 'H4': [7, 3],
 'H5': [7, 4],
 'H6': [7, 5],
 'H7': [7, 6],
 'H8': [7, 7]}

In [11]:
piecesTypes

['bishop_b',
 'bishop_w',
 'king_b',
 'king_w',
 'knight_b',
 'knight_w',
 'pawn_b',
 'pawn_w',
 'queen_b',
 'queen_w',
 'rook_b',
 'rook_w',
 'empty']

In [None]:
def create_labels_per_board(file_nr):
    json_file = json.load(open(str(file_nr) + '.json'))
    json_board = {}
    X_train, Y_train = [], []
    all_spaces = cellsCoordinates.keys()
    for space, piece in json_file['config']:
        if space not in all_spaces:
            json_board[space] = 'empty'
        else:
            json_board[space] = piece
    small_img = [filename for filename in os.listdir(piece_images_path) if filename.startswith(str(file_nr))]
    for space, filename in zip(all_spaces, small_img):
        X_train.append(cv.imread(filename))
        Y_train.append(space)
    return X_train, Y_train

In [15]:
def create_first_data_flow():
    train_generator = ImageDataGenerator(rescale=1/255).flow_from_directory(
        piece_images_path,
        batch_size=BATCH_SIZE,
        target_size=(280, 280),
        shuffle=True,
        classes=piecesTypes,
        class_mode='categorical'
    )
    valid_generator = ImageDataGenerator(rescale=1/255).flow_from_directory(
        piece_images_path,
        batch_size=BATCH_SIZE,
        target_size=(280, 280),
        class_mode='categorical'
    )
    return (train_generator, valid_generator)

In [16]:
create_first_data_flow()

Found 0 images belonging to 13 classes.
Found 0 images belonging to 0 classes.


(<keras.src.legacy.preprocessing.image.DirectoryIterator at 0x21a02e5c070>,
 <keras.src.legacy.preprocessing.image.DirectoryIterator at 0x21a02e08a30>)

# Second dataset

In [30]:
# loading datasets
train_data_path_second = second_dataset_path + '/train'
test_data_path_second = second_dataset_path + '/test'
val_data_path_second = second_dataset_path + '/val'