In [92]:
import os
import re
from collections import defaultdict
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
from multiprocessing import Process

import chess
from chess.pgn import read_game

import numpy as np

In [133]:
def extract_moves(game):
    # Takes a game from the pgn and creates list of the board state and the next
    # move that was made from that position.  The next move will be our 
    # prediction target when we turn this data over to the ConvNN.
    positions = list()
    board = chess.Board()
    moves = list(game.main_line())
    for move in moves:
        position, move_code = board.fen(), move.uci()
        positions.append([position, move_code])
        board.push(move)     
    return positions

def replace_nums(line):
    # This function cycles through a string which represents one line on the
    # chess board from the FEN notation.  It will then swap out the numbers
    # for an equivalent number of spaces.
    return ''.join([' '*8 if h=='8' else ' '*int(h) if h.isdigit() else'\n'if h=='/'else ''+h for h in line])
    
def split_fen(fen):
    # Takes the fen string and splits it into its component lines corresponding
    # to lines on the chess board and the game status. 
    fen_comps = fen.split(' ', maxsplit = 1)
    board = fen_comps[0].split('/')
    status = fen_comps[1]
    board = [replace_nums(line) for line in board]
    return board, status

def list_to_matrix(board_list):
    # Converts a list of strings into a numpy array by first 
    # converting each string into a list of its characters. 
    pos_list = [list(line) for line in board_list]
    return np.array(pos_list)

def channelize(mat):
    # processes a board into a 8 x 8 x 6 matrix where there is a 
    # channel for each type of piece.  1's correspond to white, and 
    # -1's correpond to black.
    output = np.empty([8, 8, 6])
    wpcs = ['P', 'R', 'N', 'B', 'Q', 'K']
    bpcs = ['p', 'r', 'n', 'b', 'q', 'k']
    positions = [np.isin(mat, pc).astype('int') - np.isin(mat, bpcs[i]).astype('int') for i, pc in enumerate(wpcs)]
    return np.stack(positions)

def process_status(status):
    # The last combination of characters in the FEN notation convey some different pieces of information
    # like the player who is to move next, and who can still castle. 
    # I have written the code to extract all of the different pieces, but the Agent will only need to know next_to_move. 
    splt = status.split(" ")
    next_to_move = splt[0]
    castling = splt[1]
    en_passant = splt[2]
    half_clock = splt[3]
    full_clock = splt[4]
    return next_to_move

def process_game(positions):
    # Takes a single game from a pgn and produces a dict of dicts which contains 
    # the board state, the next player to move, and the what the next move was (the prediction task).
    boards = []
    next_to_move = []
    for position in positions:
        board, status = split_fen(position[0])
        boards.append(channelize(list_to_matrix(board)))        
        next_to_move.append([process_status(status), position[1]])
    try:
        boards, ntm = np.stack(boards), np.stack(next_to_move)
    except:
        return [], []
    return boards, ntm

def read_and_process(iteration):
    gm = read_game(pgn)
    positions = extract_moves(gm)
    boards, next_to_move = process_game(positions)
    print("".join(["Completed: ", str(iteration),]))
    return boards, next_to_move

def wrangle_data_ip(num_games=10000, save_file=False, filename="chess matrices"):
    pool = ThreadPool(2)
    results = pool.map(read_and_process, range(num_games))
    pool.close() 
    pool.join()
    #if save_file:
    #    np.savez_compressed('chess_games', boards, next_to_move)
    return results

def wrangle_data(num_games=10000, save_file=False):
    boards, next_to_move = read_and_process(0)
    for i in range(1, num_games):
        new_boards, new_next_to_move = read_and_process(i)
        boards, next_to_move = np.concatenate((boards, new_boards), axis=0), np.concatenate((next_to_move, new_next_to_move), axis=0)
    if save_file:
        np.savez_compressed('data/chess_games', boards, next_to_move)
    return boards, next_to_move

In [134]:
with open('data/KingBase2017-A00-A39.pgn') as pgn:
    results = wrangle_data_ip(num_games=100, save_file=True)

error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'xa1' in rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Rfb1' in rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bay

error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'g5' in rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Kf7' in rnbqkbnr/ppppp1pp/8/5p2/6P1/8/PPPPPP1P/RNBQKBNR w KQkq - 0 2
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\b

error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Bc4' in rnbqkbnr/ppppppp1/8/7p/3P4/7P/PPP1PPP1/RNBQKBNR b KQkq - 0 2
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Ra4' in rnbqkbnr/ppppp1pp/8/5p2/6P1/8/PPPPPP1P/RNBQKBNR w KQkq - 0 2
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\e

error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Bc8' in rnbqkbnr/ppppppp1/8/7p/3P4/7P/PPP1PPP1/RNBQKBNR b KQkq - 0 2
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Rb1' in rnbqkbnr/ppppp1pp/8/5p2/6P1/8/PPPPPP1P/RNBQKBNR w KQkq - 0 2
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\e

error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Kf6' in rnbqkbnr/ppppppp1/8/7p/3P4/7P/PPP1PPP1/RNBQKBNR b KQkq - 0 2
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Bd7' in rnbqkbnr/ppppppp1/8/7p/3P4/7P/PPP1PPP1/RNBQKBNR b KQkq - 0 2

Completed: 0
Completed: 1
Completed: 2
Completed: 3
Completed: 4



error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Nd3' in rnbqkbnr/ppppppp1/8/7p/3P4/7P/PPP1PPP1/RNBQKBNR b KQkq - 0 2
error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\pgn.py", line 936, in read_game
    move = board_stack[-1].parse_san(token)
  File "C:\Anaconda3\envs\bayes\lib\site-packages\chess\__init__.py", line 2737, in parse_san
    raise ValueError("illegal san: {0} in {1}".format(repr(san), self.fen()))
ValueError: illegal san: 'Kd4' in rnbqkbnr/ppppppp1/8/7p/3P4/7P/PPP1PPP1/RNBQKBNR b KQkq - 0 2


Completed: 5
Completed: 6
Completed: 7
Completed: 8
Completed: 9
Completed: 10
Completed: 11
Completed: 13
Completed: 12
Completed: 26
Completed: 27
Completed: 14
Completed: 15
Completed: 16
Completed: 28
Completed: 29
Completed: 30
Completed: 17
Completed: 18
Completed: 31
Completed: 32
Completed: 19
Completed: 20
Completed: 33
Completed: 34
Completed: 35
Completed: 36Completed: 21

Completed: 37
Completed: 38
Completed: 22
Completed: 39Completed: 23

Completed: 24
Completed: 40
Completed: 41
Completed: 42
Completed: 25
Completed: 43
Completed: 44
Completed: 52
Completed: 45
Completed: 46
Completed: 53
Completed: 47
Completed: 54
Completed: 55
Completed: 56
Completed: 57
Completed: 58
Completed: 59
Completed: 48
Completed: 60
Completed: 61
Completed: 49
Completed: 62
Completed: 63
Completed: 64
Completed: 50
Completed: 65
Completed: 66
Completed: 67
Completed: 68
Completed: 69
Completed: 51
Completed: 70
Completed: 78
Completed: 79Completed: 71

Completed: 80
Completed: 72
Completed: 

In [142]:
file = np.load('data/chess_matrices.npz')
file['arr_1']

array([['w', 'g1f3'],
       ['b', 'g8f6'],
       ['w', 'c2c4'],
       ..., 
       ['b', 'd4d3'],
       ['w', 'f3f2'],
       ['b', 'd7d4']],
      dtype='<U5')