In [1]:
from dataset_management import DatasetManager, SimpleEncoder, EncodingType
from typing import List

In [2]:
import chess
import chess.pgn
import sys

In [3]:
import tqdm
import random
import itertools
import os

In [4]:
def read_games(path : str) -> List[chess.pgn.Game]:
    with open(path, 'r') as file:
        print(f"Processing file {path}")
        game = chess.pgn.read_game(file)
        games = []
        nr = 0
        while game != None:
            if nr % 100 == 0:
                print(f"{nr} games processed so far...", end='\r')
            games.append(game)
            game = chess.pgn.read_game(file)
            nr += 1
        print(100*' ', end='\r')
        print("Processing finished")
        print(f"{nr} games have been loaded from the file {path}")
    return games

In [5]:
def game_equality(g1 : chess.pgn.Game, g2 : chess.pgn.Game, s : str):
    for i, (m1, m2) in enumerate(zip(g1.mainline_moves(), g2.mainline_moves())):
        assert m1.uci() == m2.uci(), \
        s + f'move number: {i}, first game: {m1.uci()}, second game: {m2.uci()}'

def test_encoder(games : List[chess.pgn.Game], encoder : EncodingType):
    for j, game in tqdm.tqdm(enumerate(games)):
        encoding = encoder.encode_pgn(game)
        decoded = encoder.decode_to_pgn(encoding)
        game_equality(game, decoded, f'game number: {j}')

In [6]:
def test_game_generator_correctness(games : List[chess.pgn.Game], dataset : DatasetManager, n : int):
    print(f'Running {n} tests')
    total_games_tested = 0
    for t in tqdm.tqdm(range(n)):
        l, r = random.randint(0, len(games)-1), random.randint(0, len(games)-1)
        if r < l:
            l, r = r, l
        for i, game in enumerate(dataset.games(l, r)):
            original_game = games[l+i]
            game_equality(original_game, game, f'interval: [{l}, {r}], game number: {l+i}')
        total_games_tested += r-l+1
        tqdm.tqdm.write(f'{total_games_tested} games checked so far')
    print(f'Finished, {n} tests passed, {total_games_tested} games checked in total')
            

In [7]:
def test_position_selector_correctness(games : List[chess.pgn.Game], dataset : DatasetManager, n : int):
    print(f'Running {n} tests')
    total_moves_on_prefix = list(itertools.accumulate(map(lambda x: len(list(x.mainline_moves())), games), initial=0))
    total_moves_tested = 0
    for t in tqdm.tqdm(range(n)):
        game_id = random.randint(0, len(games)-1)
        for i, m in enumerate(games[game_id].mainline_moves()):
            position_id = total_moves_on_prefix[game_id] + i
            move_id, g = dataset.game_by_position_id(position_id)
            assert i == move_id, "wrong move index"
            game_equality(games[game_id], g, f'position id: {position_id}')
            total_moves_tested += 1
        if t % 10 == 0:
            tqdm.tqdm.write(f'{total_moves_tested} positions checked so far')
    print(f'Finished, {n} tests passed, {total_moves_tested} positions checked in total')


In [8]:
dataset = DatasetManager('my_dataset', create_empty=True)

In [9]:
encoder = SimpleEncoder()

In [10]:
dataset.add_pgn_file(os.path.join('Lichess Elite Database', 'lichess_elite_2016-01.pgn'))

max_number_of_games: 1000
Chunk len: 1000
number_of_positions: 78882
max_number_of_games: 2000
Chunk len: 1000
number_of_positions: 158447
max_number_of_games: 3000
Chunk len: 1000
number_of_positions: 238136
max_number_of_games: 4000
Chunk len: 1000
number_of_positions: 320212
max_number_of_games: 5000
Chunk len: 1000
number_of_positions: 400211
max_number_of_games: 6000
Chunk len: 1000
number_of_positions: 481340
max_number_of_games: 7000
Chunk len: 1000
number_of_positions: 560873
max_number_of_games: 8000
Chunk len: 1000
number_of_positions: 639921
max_number_of_games: 9000
Chunk len: 1000
number_of_positions: 718378
max_number_of_games: 10000
Chunk len: 91
number_of_positions: 725493


In [11]:
games = read_games(os.path.join('Lichess Elite Database', 'lichess_elite_2016-01.pgn'))

Processing file /home/ereonzis/Dokumenty/ProjectPhoenix/dataset/Lichess Elite Database/lichess_elite_2016-01.pgn
Processing finished                                                                                 
9091 games have been loaded from the file /home/ereonzis/Dokumenty/ProjectPhoenix/dataset/Lichess Elite Database/lichess_elite_2016-01.pgn


In [12]:
test_encoder(games, encoder)

9091it [00:16, 541.26it/s]


In [13]:
test_position_selector_correctness(games, dataset, 1000)

Running 1000 tests


  0%|          | 2/1000 [00:00<03:11,  5.21it/s]

88 positions checked so far


  1%|          | 11/1000 [00:02<04:05,  4.03it/s]

940 positions checked so far


  2%|▏         | 22/1000 [00:05<02:49,  5.76it/s]

1701 positions checked so far


  3%|▎         | 30/1000 [00:07<04:58,  3.25it/s]

2537 positions checked so far


  4%|▍         | 41/1000 [00:10<05:20,  2.99it/s]

3520 positions checked so far


  5%|▌         | 51/1000 [00:12<03:26,  4.59it/s]

4305 positions checked so far


  6%|▌         | 62/1000 [00:16<04:05,  3.83it/s]

5302 positions checked so far


  7%|▋         | 73/1000 [00:19<03:07,  4.94it/s]

6206 positions checked so far


  8%|▊         | 82/1000 [00:20<02:47,  5.47it/s]

6757 positions checked so far


  9%|▉         | 92/1000 [00:24<04:20,  3.48it/s]

7629 positions checked so far


 10%|█         | 103/1000 [00:27<02:54,  5.14it/s]

8539 positions checked so far


 11%|█         | 112/1000 [00:29<02:40,  5.54it/s]

9276 positions checked so far


 12%|█▏        | 121/1000 [00:31<04:01,  3.64it/s]

9958 positions checked so far


 13%|█▎        | 131/1000 [00:34<04:38,  3.12it/s]

10770 positions checked so far


 14%|█▍        | 141/1000 [00:37<05:54,  2.42it/s]

11745 positions checked so far


 15%|█▌        | 153/1000 [00:40<02:13,  6.36it/s]

12619 positions checked so far


 16%|█▌        | 161/1000 [00:41<01:43,  8.09it/s]

13175 positions checked so far


 17%|█▋        | 172/1000 [00:43<02:32,  5.43it/s]

13888 positions checked so far


 18%|█▊        | 181/1000 [00:45<02:18,  5.90it/s]

14547 positions checked so far


 19%|█▉        | 192/1000 [00:47<02:23,  5.64it/s]

15354 positions checked so far


 20%|██        | 201/1000 [00:49<02:00,  6.61it/s]

16064 positions checked so far


 21%|██        | 212/1000 [00:51<02:39,  4.94it/s]

16983 positions checked so far


 22%|██▏       | 222/1000 [00:53<02:01,  6.41it/s]

17652 positions checked so far


 23%|██▎       | 231/1000 [00:56<03:32,  3.61it/s]

18488 positions checked so far


 24%|██▍       | 241/1000 [00:58<03:36,  3.51it/s]

19304 positions checked so far


 25%|██▌       | 251/1000 [01:01<03:41,  3.39it/s]

20113 positions checked so far


 26%|██▌       | 261/1000 [01:06<05:16,  2.33it/s]

20984 positions checked so far


 27%|██▋       | 271/1000 [01:08<02:25,  5.02it/s]

21643 positions checked so far


 28%|██▊       | 282/1000 [01:10<02:43,  4.39it/s]

22444 positions checked so far


 29%|██▉       | 291/1000 [01:13<03:47,  3.12it/s]

23195 positions checked so far


 30%|███       | 301/1000 [01:15<02:13,  5.23it/s]

23879 positions checked so far


 31%|███▏      | 313/1000 [01:18<02:02,  5.60it/s]

24663 positions checked so far


 32%|███▏      | 322/1000 [01:20<02:21,  4.78it/s]

25485 positions checked so far


 33%|███▎      | 331/1000 [01:22<02:10,  5.14it/s]

26295 positions checked so far


 34%|███▍      | 342/1000 [01:23<01:51,  5.92it/s]

26966 positions checked so far


 35%|███▌      | 351/1000 [01:25<02:08,  5.04it/s]

27732 positions checked so far


 36%|███▌      | 361/1000 [01:27<02:29,  4.28it/s]

28544 positions checked so far


 37%|███▋      | 372/1000 [01:29<01:30,  6.97it/s]

29214 positions checked so far


 38%|███▊      | 381/1000 [01:31<01:59,  5.20it/s]

29925 positions checked so far


 39%|███▉      | 391/1000 [01:32<01:54,  5.32it/s]

30643 positions checked so far


 40%|████      | 401/1000 [01:35<02:29,  4.01it/s]

31393 positions checked so far


 41%|████      | 411/1000 [01:36<01:34,  6.23it/s]

32054 positions checked so far


 42%|████▏     | 421/1000 [01:38<02:01,  4.76it/s]

32783 positions checked so far


 43%|████▎     | 431/1000 [01:40<02:21,  4.02it/s]

33639 positions checked so far


 44%|████▍     | 441/1000 [01:43<02:37,  3.55it/s]

34416 positions checked so far


 45%|████▌     | 451/1000 [01:48<09:04,  1.01it/s]

35143 positions checked so far


 46%|████▌     | 462/1000 [01:51<02:14,  4.01it/s]

36049 positions checked so far


 47%|████▋     | 472/1000 [01:53<01:17,  6.84it/s]

36807 positions checked so far


 48%|████▊     | 481/1000 [01:54<01:07,  7.65it/s]

37556 positions checked so far


 49%|████▉     | 492/1000 [01:56<01:30,  5.59it/s]

38441 positions checked so far


 50%|█████     | 501/1000 [01:58<01:13,  6.77it/s]

39256 positions checked so far


 51%|█████     | 511/1000 [02:00<01:14,  6.56it/s]

40142 positions checked so far


 52%|█████▏    | 521/1000 [02:01<01:01,  7.79it/s]

40855 positions checked so far


 53%|█████▎    | 531/1000 [02:03<01:04,  7.28it/s]

41634 positions checked so far


 54%|█████▍    | 542/1000 [02:05<00:56,  8.14it/s]

42329 positions checked so far


 55%|█████▌    | 553/1000 [02:07<01:14,  6.00it/s]

43229 positions checked so far


 56%|█████▌    | 562/1000 [02:08<01:00,  7.25it/s]

43860 positions checked so far


 57%|█████▋    | 572/1000 [02:10<01:47,  3.98it/s]

44762 positions checked so far


 58%|█████▊    | 582/1000 [02:13<01:31,  4.54it/s]

45729 positions checked so far


 59%|█████▉    | 592/1000 [02:14<01:19,  5.12it/s]

46504 positions checked so far


 60%|██████    | 600/1000 [02:17<02:22,  2.80it/s]

47298 positions checked so far


 61%|██████    | 612/1000 [02:19<01:03,  6.08it/s]

48045 positions checked so far


 62%|██████▏   | 622/1000 [02:20<01:17,  4.89it/s]

48855 positions checked so far


 63%|██████▎   | 631/1000 [02:22<01:00,  6.06it/s]

49659 positions checked so far


 64%|██████▍   | 641/1000 [02:24<00:46,  7.69it/s]

50413 positions checked so far


 65%|██████▌   | 651/1000 [02:25<00:53,  6.47it/s]

51100 positions checked so far


 66%|██████▋   | 663/1000 [02:27<00:39,  8.56it/s]

51800 positions checked so far


 67%|██████▋   | 672/1000 [02:28<00:42,  7.73it/s]

52388 positions checked so far


 68%|██████▊   | 682/1000 [02:31<01:06,  4.79it/s]

53297 positions checked so far


 69%|██████▉   | 692/1000 [02:33<00:53,  5.80it/s]

54113 positions checked so far


 70%|███████   | 702/1000 [02:35<00:59,  5.00it/s]

54978 positions checked so far


 71%|███████   | 712/1000 [02:37<01:22,  3.50it/s]

55868 positions checked so far


 72%|███████▏  | 722/1000 [02:39<00:48,  5.71it/s]

56591 positions checked so far


 73%|███████▎  | 731/1000 [02:40<00:45,  5.97it/s]

57297 positions checked so far


 74%|███████▍  | 741/1000 [02:42<00:43,  5.94it/s]

58085 positions checked so far


 75%|███████▌  | 750/1000 [02:44<00:34,  7.32it/s]

58692 positions checked so far


 76%|███████▌  | 762/1000 [02:45<00:24,  9.60it/s]

59323 positions checked so far


 77%|███████▋  | 772/1000 [02:47<00:29,  7.67it/s]

59975 positions checked so far


 78%|███████▊  | 781/1000 [02:48<00:51,  4.24it/s]

60746 positions checked so far


 79%|███████▉  | 791/1000 [02:52<01:27,  2.38it/s]

61698 positions checked so far


 80%|████████  | 802/1000 [02:56<00:38,  5.16it/s]

62584 positions checked so far


 81%|████████  | 811/1000 [02:58<00:43,  4.38it/s]

63381 positions checked so far


 82%|████████▏ | 821/1000 [03:01<00:48,  3.68it/s]

64361 positions checked so far


 83%|████████▎ | 831/1000 [03:04<00:51,  3.26it/s]

65141 positions checked so far


 84%|████████▍ | 841/1000 [03:07<00:52,  3.04it/s]

65977 positions checked so far


 85%|████████▌ | 851/1000 [03:10<00:42,  3.54it/s]

66780 positions checked so far


 86%|████████▌ | 861/1000 [03:13<00:36,  3.76it/s]

67629 positions checked so far


 87%|████████▋ | 871/1000 [03:16<00:27,  4.66it/s]

68385 positions checked so far


 88%|████████▊ | 881/1000 [03:18<00:35,  3.35it/s]

69191 positions checked so far


 89%|████████▉ | 892/1000 [03:21<00:31,  3.41it/s]

69969 positions checked so far


 90%|█████████ | 901/1000 [03:24<00:29,  3.31it/s]

70658 positions checked so far


 91%|█████████▏| 913/1000 [03:27<00:13,  6.23it/s]

71398 positions checked so far


 92%|█████████▏| 922/1000 [03:29<00:16,  4.65it/s]

72151 positions checked so far


 93%|█████████▎| 931/1000 [03:31<00:14,  4.71it/s]

72811 positions checked so far


 94%|█████████▍| 941/1000 [03:35<00:44,  1.33it/s]

73695 positions checked so far


 95%|█████████▌| 952/1000 [03:38<00:12,  3.87it/s]

74381 positions checked so far


 96%|█████████▌| 961/1000 [03:40<00:09,  4.16it/s]

75107 positions checked so far


 97%|█████████▋| 972/1000 [03:43<00:07,  3.59it/s]

75946 positions checked so far


 98%|█████████▊| 982/1000 [03:46<00:03,  4.94it/s]

76680 positions checked so far


 99%|█████████▉| 991/1000 [03:48<00:01,  4.77it/s]

77279 positions checked so far


100%|██████████| 1000/1000 [03:52<00:00,  4.31it/s]

Finished, 1000 tests passed, 78175 positions checked in total





In [14]:
test_game_generator_correctness(games, dataset, 10)

Running 10 tests


 10%|█         | 1/10 [00:05<00:45,  5.04s/it]

3015 games checked so far


 10%|█         | 1/10 [00:07<01:09,  7.69s/it]


KeyboardInterrupt: 

In [None]:
#Testing loading existings dataset
dataset = DatasetManager('my_dataset', create_empty=False)

In [None]:
test_game_generator_correctness(games, dataset, 10)

In [None]:
dataset.add_pgn_file(os.path.join('Lichess Elite Database', 'lichess_elite_2015-11.pgn'))

In [None]:
games.extend(read_games(os.path.join('Lichess Elite Database', 'lichess_elite_2015-11.pgn')))

In [None]:
test_game_generator_correctness(games, dataset, 10)