In [2]:
import os
import chess
import chess.pgn
import chess.engine
import numpy as np
import time
import h5py
import sys
WORKSPACE_PATH =""

sys.path.append(WORKSPACE_PATH)
import format_bitmap_plus as fm

In [3]:
PGNS_PATH = '../data/Fichier_pgn/'
BITMAPS_PATH = '../data/Fichier_pgn/bitmaps.hdf5'
LABELS_PATH = '../data/Fichier_pgn/labels.hdf5'

In [4]:
MIN_PLYS = 10
BITMAP_SIZE = 773

In [11]:
def select(game):

    try:
        white_elo = int(game.headers['WhiteElo'])
        black_elo = int(game.headers['BlackElo'])
        nb_plys = int(game.headers['PlyCount'])
        result = game.headers['Result']


        if nb_plys < MIN_PLYS:
            return False

        if result == '1-0' and white_elo > black_elo:
            difference = white_elo - black_elo

            if white_elo >= 2500 and difference >= 100 :
                return True

            elif white_elo < 2500 and white_elo >= 2300 and difference >= 150 :
                return True

            elif white_elo < 2300 and white_elo >= 2100 and difference >= 200 :
                return True

            elif white_elo < 2100 and white_elo >= 1900 and difference >= 250 :
                return True

            elif white_elo < 1900 and difference >= 300 :
                return True

            else :
                return False

        elif result == '0-1' and black_elo > white_elo:
            difference = black_elo - white_elo

            if black_elo >= 2500 and difference >= 100:
                return True

            elif black_elo < 2500 and black_elo >= 2300 and difference >= 150:
                return True

            elif black_elo < 2300 and black_elo >= 2100 and difference >= 200:
                return True

            elif black_elo < 2100 and black_elo >= 1900 and difference >= 250:
                return True

            elif black_elo < 1900 and difference >= 300:
                return True

            else:
                return False

        else:
            return False

    except:
        return False

In [12]:
def process_game(game):
    bitmap_boards = []
    labels = []
    node = game
    node = node.variation(0)
    while True:
        board = node.board()

        bitmap = fm.get_bitmap_plus(board)
        bitmap_boards.append(bitmap)
        
        if node.is_end():
            break
        node = node.variation(0)

    bitmap_boards = np.vstack(bitmap_boards)

    return bitmap_boards

In [15]:
def create_datasets():

    idx = 0
    current_progress = 0
    NB_EXAMPLES = 400

    with h5py.File(BITMAPS_PATH, 'w') as bitmaps_hdf:
        with h5py.File(LABELS_PATH, 'w') as labels_hdf:
            bitmaps = bitmaps_hdf.create_dataset("dataset", (NB_EXAMPLES, BITMAP_SIZE), dtype=np.int8)
            labels = labels_hdf.create_dataset("dataset", (NB_EXAMPLES,), dtype=np.int8)

            for root, dirs, filenames in os.walk(PGNS_PATH):
                for f in filenames:
                    with open(os.path.join(root, f), 'r') as pgn:
                        game = chess.pgn.read_game(pgn)
                       
                        while(game):                        

                            if select(game):

                                label = 1 if game.headers['Result'] == '1-0' else 0

                                game_bitmaps = process_game(game)

                                nb_boards = game_bitmaps.shape[0]
                                game_labels = np.array([label] * nb_boards, dtype=np.int8)

                                nb_to_add = min(nb_boards, NB_EXAMPLES - idx)

                                bitmaps[idx: idx + nb_to_add] = game_bitmaps[:nb_to_add]
                                labels[idx: idx + nb_to_add] = game_labels[:nb_to_add]
                                idx += nb_to_add
                                progress = (100 * idx) // NB_EXAMPLES
                                if progress >= current_progress + 5:
                                    current_progress = progress
                                    print("{}% completed".format(current_progress))

                                if idx == NB_EXAMPLES:
                                    break

                            # read next game
                            game = chess.pgn.read_game(pgn)



In [16]:
create_datasets()

21% completed
30% completed
50% completed
63% completed
73% completed
88% completed
100% completed


In [18]:
with h5py.File(LABELS_PATH, 'r') as labels_hdf:
    labels=labels_hdf.get("dataset")
    labels_np=np.array(labels)
    print(labels_np)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [19]:
with h5py.File(BITMAPS_PATH, 'r') as bitmaps_hdf:
    bitmaps=bitmaps_hdf.get("dataset")
    bitmaps_np=np.array(bitmaps)
    print(bitmaps_np)

[[0 0 0 ... 1 1 0]
 [0 0 0 ... 1 1 1]
 [0 0 0 ... 1 1 0]
 ...
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]]


In [20]:
bitmaps_np.shape

(400, 773)