In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
pip install chess

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting chess
  Downloading chess-1.9.4-py3-none-any.whl (149 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.1/149.1 KB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: chess
Successfully installed chess-1.9.4


In [None]:
import chess
import numpy as np
import random
from chess import Move

In [None]:
!   wget https://stockfishchess.org/files/stockfish_14_linux_x64_popcnt.zip && \
    unzip stockfish_14_linux_x64_popcnt.zip stockfish_14_linux_x64_popcnt/stockfish_14_x64_popcnt

--2023-01-13 09:15:49--  https://stockfishchess.org/files/stockfish_14_linux_x64_popcnt.zip
Resolving stockfishchess.org (stockfishchess.org)... 172.67.80.249, 104.25.158.9, 104.25.159.9, ...
Connecting to stockfishchess.org (stockfishchess.org)|172.67.80.249|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28531469 (27M) [application/zip]
Saving to: ‘stockfish_14_linux_x64_popcnt.zip’


2023-01-13 09:15:56 (4.43 MB/s) - ‘stockfish_14_linux_x64_popcnt.zip’ saved [28531469/28531469]

Archive:  stockfish_14_linux_x64_popcnt.zip
  inflating: stockfish_14_linux_x64_popcnt/stockfish_14_x64_popcnt  


In [None]:
import chess.engine

In [None]:
engine = chess.engine.SimpleEngine.popen_uci("/content/stockfish_14_linux_x64_popcnt/stockfish_14_x64_popcnt")

In [None]:
def serialize(brd):

    mark_white = 0
    mark_black = 0

    pawnBoard = np.zeros(64, float)
    for i in range(64):
        pp = brd.piece_at(i)
        if pp is None:
            continue
        if pp.symbol() == 'P':
            pawnBoard[i] = 1
        if pp.symbol() == 'p':
            pawnBoard[i] = -1

    knightBoard = np.zeros(64, float)
    for i in range(64):
        pp = brd.piece_at(i)
        if pp is None:
            continue
        if pp.symbol() == 'N':
            knightBoard[i] = 1
            mark_white += 1
        if pp.symbol() == 'n':
            knightBoard[i] = -1
            mark_black += 1
            

    bishopBoard = np.zeros(64, float)
    for i in range(64):
        pp = brd.piece_at(i)
        if pp is None:
            continue
        if pp.symbol() == 'B':
            bishopBoard[i] = 1
            mark_white += 1
        if pp.symbol() == 'b':
            bishopBoard[i] = -1
            mark_black += 1

    rookBoard = np.zeros(64, float)
    for i in range(64):
        pp = brd.piece_at(i)
        if pp is None:
            continue
        if pp.symbol() == 'R':
            rookBoard[i] = 1
            mark_white += 1
        if pp.symbol() == 'r':
            rookBoard[i] = -1
            mark_black += 1

    queenBoard = np.zeros(64, float)
    for i in range(64):
        pp = brd.piece_at(i)
        if pp is None:
            continue
        if pp.symbol() == 'Q':
            queenBoard[i] = 1
            mark_white += 1
        if pp.symbol() == 'q':
            queenBoard[i] = -1
            mark_black += 1

    kingBoard = np.zeros(64, float)
    for i in range(64):
        pp = brd.piece_at(i)
        if pp is None:
            continue
        if pp.symbol() == 'K':
            kingBoard[i] = 1
        if pp.symbol() == 'k':
            kingBoard[i] = -1
    if mark_white <= 2 and mark_black <= 2:
        state = 3
    else:
        state = 2
    return state, np.concatenate((pawnBoard,
                                  knightBoard,
                                  bishopBoard,
                                  rookBoard,
                                  queenBoard,
                                  kingBoard))

In [None]:
def str2float(score):
    if score[0] == '#':
        return float(score[1:])
    else:
        return float(score)

# **Generate Dataset**

In [None]:
import chess.pgn

def get_dataset():
    X_opening, y_opening, X_middle, y_middle, X_ending, y_ending = [], [], [], [], [], []
    count_data = 0
    count_opening, count_middle, count_ending = 0, 0, 0

    pgn = open('/content/gdrive/MyDrive/model/data/ficsgamesdb_2020_standard2000_nomovetimes_272524.pgn')
    
    while True:
        if count_opening > 1500000:
            break
        game = chess.pgn.read_game(pgn)
        if game is None:
            break
        board = game.board()
        number_of_move = 0
        for move in game.mainline_moves():
            number_of_move += 1
            board.push(move)
            count_data += 1

            # if count_data < 3000000:
            #     continue

            state, ser = serialize(board)

            # score = str2float(str(engine.analyse(board, chess.engine.Limit(depth=8))['score'].pov(color=chess.WHITE)))

            if number_of_move <= 20:
                count_opening += 1
                X_opening.append(ser)
                score = str2float(str(engine.analyse(board, chess.engine.Limit(depth=8))['score'].pov(color=chess.WHITE)))
                y_opening.append(score)
            else:
                continue

            if count_data % 10000 == 0:
                print(f'data: {count_data}')
                print(f'opening data: {count_opening}')
                print(f'middle data: {count_middle}')
                print(f'ending data: {count_ending}')
                
    X_opening = np.array(X_opening)
    # X_middle = np.array(X_middle)
    # X_ending = np.array(X_ending)
    y_opening = np.array(y_opening)
    # y_middle = np.array(y_middle)
    # y_ending = np.array(y_ending)

    pathX_opening = '/content/gdrive/MyDrive/model/processed_data/X_opening_1_' + str(count_opening/1000000) + '.npy'
    pathy_opening = '/content/gdrive/MyDrive/model/processed_data/y_opening_1_' + str(count_opening/1000000) + '.npy'
    # pathX_middle = '/content/gdrive/MyDrive/model/processed_data/X_middle_3_' + str(count_middle/1000000) + '.npy'
    # pathy_middle = '/content/gdrive/MyDrive/model/processed_data/y_middle_3_' + str(count_middle/1000000) + '.npy'
    # pathX_ending = '/content/gdrive/MyDrive/model/processed_data/X_ending_9_' + str(count_ending/1000000) + '.npy'
    # pathy_ending = '/content/gdrive/MyDrive/model/processed_data/y_ending_9_' + str(count_ending/1000000) + '.npy'
    np.save(pathX_opening, X_opening)
    np.save(pathy_opening, y_opening)
    # np.save(pathX_middle, X_middle)
    # np.save(pathy_middle, y_middle)
    # np.save(pathX_ending, X_ending)
    # np.save(pathy_ending, y_ending)

In [None]:
get_dataset()

data: 990000
opening data: 250798
middle data: 0
ending data: 0
data: 1020000
opening data: 258774
middle data: 0
ending data: 0
data: 1070000
opening data: 271464
middle data: 0
ending data: 0
data: 1160000
opening data: 294251
middle data: 0
ending data: 0
data: 1250000
opening data: 318756
middle data: 0
ending data: 0
data: 1280000
opening data: 326728
middle data: 0
ending data: 0
data: 1370000
opening data: 351868
middle data: 0
ending data: 0
data: 1380000
opening data: 354650
middle data: 0
ending data: 0
data: 1390000
opening data: 357433
middle data: 0
ending data: 0
data: 1400000
opening data: 360434
middle data: 0
ending data: 0
data: 1500000
opening data: 387425
middle data: 0
ending data: 0
data: 1530000
opening data: 396051
middle data: 0
ending data: 0
data: 1580000
opening data: 410025
middle data: 0
ending data: 0
data: 1620000
opening data: 420718
middle data: 0
ending data: 0
data: 1690000
opening data: 438715
middle data: 0
ending data: 0
data: 1730000
opening data