# deeplodocus-dataset-creator

#### За основу было взято: [DeepChess](https://github.com/paintception/DeepChess)

#### Мои итоговые датасеты: [Google drive](https://drive.google.com/drive/folders/1VvwrBkgA9i_8i3D2PHZ_CJ_ni2xFUPlT)

<p style="text-align:center;"><img src="media/logo.png" alt="drawing" width="200"/></p>

## Парсинг игр

### Скачать игры можно [здесь](https://www.ficsgames.org/download.html)
### Рекомендуется скачивать игры между людьми с ELO > 2000 и длинным тайм-контролем

In [None]:
from __future__ import division

import time
import chess
import chess.pgn
import chess.uci
import numpy as np
import os

PATH = "/home/sergey/games/" # В какую папку сохранять игры после парсинга
pgn = open("ficsgamesdb_2021_chess2000_nomovetimes_304796.pgn") # Скачанный файл

first_game = chess.pgn.read_game(pgn)

while first_game: 
    game_name = first_game.headers['White'] + '-' + first_game.headers['Black']
    print(game_name)
    out = open(PATH+game_name+'.pgn', 'w')
    exporter = chess.pgn.FileExporter(out)
    first_game.accept(exporter)
    first_game = chess.pgn.read_game(pgn)

 ## Оценка позиций с помощью шахматного движка

In [None]:
GAMES_DIRECTORY = "/home/sergey/games/" # Папка с играми после парсинга
STORING_PATH = '/home/sergey/eval/' # В какую папку сохранять оценки движка

# https://stockfishchess.org/download/
engine = chess.uci.popen_engine('/home/sergey/stockfish/stockfish-ubuntu-x86-64-avx2') # Путь к движку
engine.uci()


def load_game():
    numm = 1
    for root, dirs, filenames in os.walk(GAMES_DIRECTORY):
        for f in filenames:
            if f.endswith('.pgn'):
                try:
                    pgn = open(os.path.join(root, f), 'r')
                    game = chess.pgn.read_game(pgn)
                    process_game(game)
                    os.remove(GAMES_DIRECTORY+str(f))
                    if numm % 10 == 0:
                        print(numm)
                    numm += 1
                except:
                    pass

def splitter(inputStr, black):
    inputStr = format(inputStr, "064b")
    tmp = [inputStr[i:i+8] for i in range(0, len(inputStr), 8)]
    for i in range(0, len(tmp)):
        tmp2 = list(tmp[i])
        tmp2 = [int(x) * black for x in tmp2]
        tmp[i] = tmp2

    return tmp


def MlpBitmaps(board, e, filename):
    P_input = splitter(int(board.pieces(chess.PAWN, chess.WHITE)), 1)
    R_input = splitter(int(board.pieces(chess.ROOK, chess.WHITE)), 1)
    N_input = splitter(int(board.pieces(chess.KNIGHT, chess.WHITE)), 1)
    B_input = splitter(int(board.pieces(chess.BISHOP, chess.WHITE)), 1)
    Q_input = splitter(int(board.pieces(chess.QUEEN, chess.WHITE)), 1)
    K_input = splitter(int(board.pieces(chess.KING, chess.WHITE)), 1)

    p_input = splitter(int(board.pieces(chess.PAWN, chess.BLACK)), -1)
    r_input = splitter(int(board.pieces(chess.ROOK, chess.BLACK)), -1)
    n_input = splitter(int(board.pieces(chess.KNIGHT, chess.BLACK)), -1)
    b_input = splitter(int(board.pieces(chess.BISHOP, chess.BLACK)), -1)
    q_input = splitter(int(board.pieces(chess.QUEEN, chess.BLACK)), -1)
    k_input = splitter(int(board.pieces(chess.KING, chess.BLACK)), -1)

    with open(filename, 'a') as thefile:

        thefile.write("%s;" % P_input)
        thefile.write("%s;" % R_input)
        thefile.write("%s;" % N_input)
        thefile.write("%s;" % B_input)
        thefile.write("%s;" % Q_input)
        thefile.write("%s;" % K_input)
        thefile.write("%s;" % p_input)
        thefile.write("%s;" % r_input)
        thefile.write("%s;" % n_input)
        thefile.write("%s;" % b_input)
        thefile.write("%s;" % q_input)
        thefile.write("%s;" % k_input)
        thefile.write("%s\n" % e)

def GameChecker(board):
    P_input = splitter(int(board.pieces(chess.PAWN, chess.WHITE)), 1)
    R_input = splitter(int(board.pieces(chess.ROOK, chess.WHITE)), 5)
    N_input = splitter(int(board.pieces(chess.KNIGHT, chess.WHITE)), 3)
    B_input = splitter(int(board.pieces(chess.BISHOP, chess.WHITE)), 3)
    Q_input = splitter(int(board.pieces(chess.QUEEN, chess.WHITE)), 9)
    
    p_input = splitter(int(board.pieces(chess.PAWN, chess.BLACK)), 1)
    r_input = splitter(int(board.pieces(chess.ROOK, chess.BLACK)), 5)
    n_input = splitter(int(board.pieces(chess.KNIGHT, chess.BLACK)), 3)
    b_input = splitter(int(board.pieces(chess.BISHOP, chess.BLACK)), 3)
    q_input = splitter(int(board.pieces(chess.QUEEN, chess.BLACK)), 9)

    Status = P_input+R_input+N_input+B_input+Q_input+p_input+r_input+n_input+b_input+q_input
    TmpStatus = [item for sublist in Status for item in sublist]

    return sum(TmpStatus)

def makeDatasets(board, evaluation, moveCnt):
    if moveCnt < 40:
        MlpBitmaps(board,evaluation,STORING_PATH+'MlpFileOpening.txt')
    else:
        cp = GameChecker(board)
        if cp <= 12:
            MlpBitmaps(board,evaluation,STORING_PATH+'MlpFileEnd.txt')
        elif cp > 12:
            MlpBitmaps(board,evaluation,STORING_PATH+'MlpFileMiddle.txt')
        else:
            pass

def process_game(game):
    positions = []
    evaluations = []

    GM_board = chess.Board()
    node = game
    depth = 8 # Глубина движка

    info_handler = chess.uci.InfoHandler()
    engine.info_handlers.append(info_handler)
    tmp = 0

    while not node.is_end():

        try:
            engine.position(GM_board)
            b_m = engine.go(depth=depth)

            info = info_handler.info["score"][1]
            next_node = node.variation(0)
            
            if info[0] is not None and GM_board.turn is True:
                stock_evaluation = info[0]/100 
                new_stock_evaluation = stock_evaluation 
                
                GM_move = str(node.board().san(next_node.move))
                GM_board.push_san(GM_move)
                
                makeDatasets(GM_board, new_stock_evaluation, tmp)
                
            elif info[0] is not None and GM_board.turn is False:
                stock_evaluation = info[0]/100
                new_stock_evaluation = -1 * stock_evaluation # Инвертируем оценку для чёрных
                
                GM_move = str(node.board().san(next_node.move))
                GM_board.push_san(GM_move)
                makeDatasets(GM_board, new_stock_evaluation, tmp)
                
            node = next_node
            tmp = tmp + 1
        except:
            print('Unknown Position')
            pass

load_game()

## Получаем итоговые датасеты

In [None]:
STORING_PATH = "/home/sergey/final/" # Папка для сохранения итоговых датасетов

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

X = []
y = []
idx = 1

with open("/home/sergey/eval/merge.txt", 'r') as f: # Файл с оценками
    for line in f:
        try:
            record = line.split(";")
            pieces = [eval(x) for x in record[0:12]]
            piece = [item for sublist in pieces for item in sublist]
            piece = [item for sublist in piece for item in sublist]

            X.append(piece)
            y.append(float(record[12][:-2]))
            
            if idx % 1000 == 0:
                print(idx)
            idx += 1
        except:
            pass

y = NormalizeData(y).tolist()

print(X[1])
print(y[1])

print(len(X))
print(len(y))

np.save(STORING_PATH+'Positions.npy', X)
np.save(STORING_PATH+'Labels.npy', y)