In [1]:
import pandas as pd

In [7]:
def parse_move(move, colour):
    '''
    Read a single move and define the attributes of the move
    
    Input: 
        move(string): standard pgn style move
    
    Output:
        is_take (bool): True if the move took another piece, False otherwise
        is_check (bool): True if the move put the opposing king in check, False otherwise
        is_promotion (bool): True if the move was a promotion, False otherwise
        piece (str): one of P, N, B, R, Q, or K
        rank (str): value from 1-8
        file (str): value from a-h
        
    '''
    is_take = False
    if 'x' in move:
        is_take = True
        move = move.replace('x','')
    
    is_check = False
    if '+' in move:
        is_check = False
        move = move.replace('+','')
    
    is_checkmate = False
    if '#' in move:
        is_checkmate = True
        is_check = True
        move = move.replace('#','')
    
    is_promotion = False
    if "=" in move:
        is_promotion = True
        move = move[:-2]
    
    is_castling = False
    if '-' in move:
        is_castling = True
        piece = "K"
        if '-0-' in move:
            # Queenside castling, king moves the c file
            file = 'c'
        else:
            # Kingside castling, king moves to g file
            file = 'g'
        if colour == 'Black':
            rank = '8'
        else:
            rank = '1'
        return(is_take, is_check, is_promotion, is_castling, piece, file, rank)
    
    piece = 'P'
    if move[0] in ['Q','K','N','B','R']:
        piece = move[0]
    
    rank = move[-1]
    if rank not in [str(s) for s in list(range(1,9))]:
        print(f'Rank error: {move}')
    file = move[-2]
    if file not in [r for r in 'abcdefgh']:
        print(f'File error: {move}')
        
    
    return(is_take, is_check, is_promotion, is_castling, piece, file, rank)

In [8]:
move_df = pd.DataFrame()
game_df = pd.DataFrame()

with open('games.txt') as raw_data:
    count = 1
    for line in raw_data:
        if "jowen93" in line:
            if "White" in line:
                colour = "White"
            else:
                colour = "Black"
        elif "Result" in line:
            result = line.split(' ')[1][1:-3]
            game_dict = {
                'Game': count,
                'Colour': colour,
                'Result': result
            }
            game_df = game_df.append(game_dict, ignore_index=True)
        elif line[0] == "1":
            l = [x for x in line.split(' ') if '.' not in x]
            result = l[-1]
            moves = l[:-1]
            white_moves = [moves[i] for i in range(len(moves)) if i%2 == 0]
            black_moves = [moves[i] for i in range(len(moves)) if i%2 == 1]
            
            if colour == "White":
                moves = white_moves
            else:
                moves = black_moves
            
            for move in moves:
                is_take, is_check, is_promotion, is_castling, piece, file, rank = parse_move(move, colour)
                move_dict = {"Game": count,
                            "Take": is_take,
                            "Check": is_check,
                            "Promotion": is_promotion,
                            "Castling": is_castling,
                            "Piece": piece,
                            "File": file,
                            "Rank": rank}
                move_df = move_df.append(move_dict, ignore_index=True)
            count += 1

In [15]:
move_df.Game.nunique()

2298

In [18]:
game_df.to_pickle('games.pkl')
move_df.to_pickle('moves.pkl')

In [46]:
move_df.Rank.value_counts()

d    14473
e    14403
c    12948
f    12130
g     8234
b     8054
h     5666
a     4994
-     2480
1       66
7       59
2       54
8       46
3       30
6       28
/       22
5       18
4       16
Name: Rank, dtype: int64

In [13]:
s = '[Result "1/2-1/2"]\n'

In [11]:
data = data.append({'x':1, 'y':2}, ignore_index=True)

In [12]:
data

Unnamed: 0,x,y
0,1.0,2.0


In [4]:
all_moves = [x.strip().split(' ') for x in game.split('.')[1:]]
white_moves = [i[0] for i in all_moves]
black_moves = [i[1] for i in all_moves]
result = all_moves[-1][-1]

In [5]:
print(white_moves)
print(black_moves)

['e4', 'd4', 'e5', 'c3', 'Nf3', 'cxd4', 'Bd3', 'Bc2', 'b3', 'Na3', 'Bb2', 'Nxb5', 'Kd2', 'Rc1', 'Bc3', 'Rxc2']
['e6', 'd5', 'c5', 'Nc6', 'cxd4', 'Qb6', 'Bd7', 'Nb4', 'Rc8', 'Qa6', 'Bb5', 'Nxc2+', 'Qxb5', 'Bb4+', 'Rxc3', 'Rd3+']


In [6]:
for move in white_moves:
    print(parse_move(move))

(False, False, False, 'P', 'e', '4')
(False, False, False, 'P', 'd', '4')
(False, False, False, 'P', 'e', '5')
(False, False, False, 'P', 'c', '3')
(False, False, False, 'N', 'f', '3')
(True, False, False, 'P', 'd', '4')
(False, False, False, 'B', 'd', '3')
(False, False, False, 'B', 'c', '2')
(False, False, False, 'P', 'b', '3')
(False, False, False, 'N', 'a', '3')
(False, False, False, 'B', 'b', '2')
(True, False, False, 'N', 'b', '5')
(False, False, False, 'K', 'd', '2')
(False, False, False, 'R', 'c', '1')
(False, False, False, 'B', 'c', '3')
(True, False, False, 'R', 'c', '2')


In [12]:
d['x']['y'] = 5

In [13]:
d

{'x': {'y': 5}, 1: 6}