In [2]:
import chess
import chess.pgn
import os

In [3]:
def read_games_from_file(file_path):
    games = []
    with open(file_path, 'r') as pgn_file:
        while True:
            try:
                game = chess.pgn.read_game(pgn_file)
                if game is None:
                    break
                games.append(game)
            except ValueError as e:
                print(f"Pominięto partię z powodu błędu: {e}")
    return games

def is_checkmate(game):
    board = game.board()
    for move in game.mainline_moves():
        board.push(move)
    return board.is_checkmate()

def filter_checkmate_games(games, pgn_mate_path, pgn_draw_path):
    checkmate_games = []
    draw_games = []
    for cnt, game in enumerate(games):
        try:
            res = is_checkmate(game)
            if res:
                checkmate_games.append(game)
            elif game.headers["Result"] == "1/2-1/2":
                draw_games.append(game)
        except Exception as e:
            print(f"Pominięto partię numer {cnt} z powodu błędu: {e}")

    with open(pgn_mate_path, "a") as output_file:
        for game in checkmate_games:
            output_file.write(str(game))
            output_file.write("\n\n")

    with open(pgn_draw_path, "a") as output_file:
        for game in draw_games:
            output_file.write(str(game))
            output_file.write("\n\n")


data_path = "../data/raw/"

files = os.listdir(data_path)

# for file in files:
#     games = read_games_from_file(data_path + file)
#     filter_checkmate_games(games, data_path + "mates.pgn", data_path + "draw.pgn")

In [4]:
games = read_games_from_file(data_path+"mates.pgn")
drawn = read_games_from_file(data_path + "draw.pgn")

In [5]:
import csv

invalid = 0
for cnt, game in enumerate(drawn):
    data = []
    board = chess.Board()
    cor = True
    

    for move in game.mainline_moves():
        legal_moves = list(board.legal_moves)
        if move in legal_moves:
            board_fen = board.fen()
            score = 0.5
            turn = int(board.turn)
            row = [board_fen, score, turn]
            data.append(row)
            board.push(move)
        else:
            cor = False
            invalid += 1
            break
    if cor:
        with open(data_path+"drawn.csv", "a") as f:
            writer = csv.writer(f)
            writer.writerows(data)
    if cnt % 1000 == 0:
        print(f"{cnt}/{len(drawn)}")

        
print("invalid drawns: ", invalid)


0/82977
1000/82977
2000/82977
3000/82977
4000/82977
5000/82977
6000/82977
7000/82977
8000/82977
9000/82977
10000/82977
11000/82977
12000/82977
13000/82977
14000/82977
15000/82977
16000/82977
17000/82977
18000/82977
19000/82977
20000/82977
21000/82977
22000/82977
23000/82977
24000/82977
25000/82977
26000/82977
27000/82977
28000/82977
29000/82977
30000/82977
31000/82977
32000/82977
33000/82977
34000/82977
35000/82977
36000/82977
37000/82977
38000/82977
39000/82977
40000/82977
41000/82977
42000/82977
43000/82977
44000/82977
45000/82977
46000/82977
47000/82977
48000/82977
49000/82977
50000/82977
51000/82977
52000/82977
53000/82977
54000/82977
55000/82977
56000/82977
57000/82977
58000/82977
59000/82977
60000/82977
61000/82977
62000/82977
63000/82977
64000/82977
65000/82977
66000/82977
67000/82977
68000/82977
69000/82977
70000/82977
71000/82977
72000/82977
73000/82977
74000/82977
75000/82977
76000/82977
77000/82977
78000/82977
79000/82977
80000/82977
81000/82977
82000/82977
invalid drawns:  

In [12]:
import numpy as np

invalid = 0

for cnt, game in enumerate(games):
    data = []
    data_opp = []
    board = chess.Board()
    cor = True

    res = game.headers['Result']
    res = 1 if res == "1-0" else 0
    opp_res = 0 if res == 1 else 1

    game_len = len(list(game.mainline_moves()))
    default_scores = np.linspace(0.5, res, game_len).tolist()
    opp_scores = np.linspace(0.5, opp_res, game_len).tolist()

    for idx, move in enumerate(game.mainline_moves()):
        legal_moves = list(board.legal_moves)
        if move in legal_moves:
            board_fen = board.fen()
            turn = int(board.turn)
            row = [board_fen, turn, default_scores[idx]]
            data.append(row)

            opp_board = board.mirror()
            board_fen = opp_board.fen()
            opp_turn = int(opp_board.turn)
            row = [board_fen, opp_turn, opp_scores[idx]]
            data_opp.append(row)

            board.push(move)
        else:
            cor = False
            invalid += 1
            break
    data.extend(data_opp)

    if cor:
        with open(data_path+"mates.csv", "a") as f:
            writer = csv.writer(f)
            writer.writerows(data)
    if cnt % 1000 == 0:
        print(f"{cnt}/{len(games)}")
print("invalid: ", invalid)

0/51366
1000/51366
2000/51366
3000/51366
4000/51366
5000/51366
6000/51366
7000/51366
8000/51366
9000/51366
10000/51366
11000/51366
12000/51366
13000/51366
14000/51366
15000/51366
16000/51366
17000/51366
18000/51366
19000/51366
20000/51366
21000/51366
22000/51366
23000/51366
24000/51366
25000/51366
26000/51366
27000/51366
28000/51366
29000/51366
30000/51366
31000/51366
32000/51366
33000/51366
34000/51366
35000/51366
36000/51366
37000/51366
38000/51366
39000/51366
40000/51366
41000/51366
42000/51366
43000/51366
44000/51366
45000/51366
46000/51366
47000/51366
48000/51366
49000/51366
50000/51366
51000/51366
invalid:  244


In [19]:
import torch
import chess
data_path = "../data/raw/"
def create_tensor(board: chess.Board, transform: bool=False):
    matrix_board = torch.zeros((6, 8, 8))
    for i in range(8):
        for j in range(8):
            piece = board.piece_at(chess.square(i, j))
            if piece is not None:
                piece_type = piece.piece_type
                piece_color = piece.color
                index = piece_type - 1

                row = 7-j if not transform else j

                if piece_color == chess.WHITE:
                    matrix_board[index, row, i] = 1
                else:
                    matrix_board[index, row, i] = -1
    if transform:
        matrix_board *= -1
        matrix_board = torch.where(torch.abs(matrix_board) < 1e-6, torch.zeros_like(matrix_board), matrix_board)
    return matrix_board


tensors = []
labels = []
white_win = 0
black_win = 0
    
with open(data_path+"mates.csv", "r") as f:
    csvFile = csv.reader(f)
    for idx, line in enumerate(csvFile):

        fen, turn, result = line
        tensor = create_tensor(chess.Board(fen))
        tensor = tensor.view(6*8*8)
        tensor = torch.cat([tensor, torch.tensor([int(turn)])])
        label = float(result)
        tensors.append(tensor)
        labels.append(label)

        if label > 0.5:
            white_win+=1
        else:
            black_win+= 1

        if idx % 100000 == 0:
            print(f"{idx}" )


0
100000
200000
300000
400000
500000
600000
700000
800000
900000
1000000
1100000
1200000
1300000
1400000
1500000
1600000
1700000
1800000
1900000
2000000
2100000
2200000
2300000
2400000
2500000
2600000
2700000
2800000
2900000
3000000
3100000
3200000
3300000
3400000
3500000
3600000
3700000
3800000
3900000
4000000
4100000
4200000
4300000
4400000
4500000
4600000
4700000
4800000
4900000
5000000
5100000
5200000
5300000
5400000
5500000
5600000
5700000
5800000
5900000
6000000
6100000
6200000
6300000
6400000
6500000
6600000
6700000
6800000
6900000
7000000
7100000
7200000
7300000
7400000
7500000
7600000
7700000
7800000
7900000
8000000
8100000
8200000
8300000
8400000
8500000
8600000
8700000
8800000
8900000
9000000
9100000
9200000
9300000
9400000
9500000
9600000
9700000
9800000
9900000
10000000
10100000
10200000
10300000
10400000
10500000
10600000
10700000
10800000
10900000
11000000
11100000
11200000
11300000
11400000
11500000
11600000
11700000
11800000
11900000
12000000
12100000
12200000
12300000

In [20]:
print(f"black win: {black_win}")
print(f"white win: {white_win}")

black win: 7813285
white win: 7711041


In [21]:


draw_num = int((black_win + white_win) / 2)

with open(data_path+"drawn.txt", "r") as f:
    data = f.readlines()

import random

random.seed(42)
random.shuffle(data)

try:
    data = data[:draw_num]
except Exception:
    data = data

labels.extend([0.5 for _ in range(len(data))])

for line in data:
    fen, result, turn = line[:-6], line[-6:-3], line[-2]
    tensor = create_tensor(chess.Board(fen))
    tensor = tensor.view(6*8*8)
    tensor = torch.cat([tensor, torch.tensor([int(turn)])])
    tensors.append(tensor)


In [22]:

y = torch.tensor(labels)
torch.save(y, "../data/prep/y_turn.pt")

del y


In [23]:
x = torch.stack(tensors)
torch.save(x, "../data/prep/X_turn.pt")
del x

In [6]:
y = torch.tensor(labels)
count_0 = torch.sum(y == 0).item()
count_1 = torch.sum(y == 1).item()
count_05 = torch.sum(y == 0.5).item()

print(f"Liczba zer: {count_0}")
print(f"Liczba jedynek: {count_1}")
print(f"Liczba 0.5: {count_05}")

Liczba zer: 7762163
Liczba jedynek: 7762163
Liczba 0.5: 11928218
