In [1]:
import chess.pgn
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set(style="white")

In [2]:
## source: https://github.com/dsharpc/ChessNetworks/blob/master/data_parser/notebooks/Parser.ipynb
class Match:
    def __init__(self, game):
        self.game = game
        self.game_id = game.headers.get('Site').split('/')[-1]
        self.moves = self.get_moves()
        self.tracker = game.board().piece_map()
        self.start_tracker()
        self.black_elo = game.headers.get('BlackElo')
        self.white_elo = game.headers.get('WhiteElo')
    @staticmethod 
    def castling_move_rook(from_square, to_square):
        if from_square == 4:
            if to_square == 6:
                return {'piece':7, 'move':(7,5)}
            elif to_square == 2:
                return {'piece':0, 'move':(0,3)}
        elif from_square == 60:
            if to_square == 62:
                return {'piece':63, 'move':(63,61)}
            elif to_square == 58:
                return {'piece':56, 'move':(56,59)}
    
    def get_moves(self):
        moves = []
        for move in self.game.mainline_moves():
            from_m = move.from_square
            to_m = move.to_square
            moves.append((from_m, to_m))
        return moves
    
    @staticmethod
    def _to_uci(square):
        square = int(square)
        letter = chr(ord('a') + ((square)%8)) 
        number = square//8+1
        return f"{letter}{number}"
    
    def start_tracker(self):
        for key in self.tracker.keys():
            self.tracker[key] = {'piece': self.tracker[key].unicode_symbol()+'-'+str(self._to_uci(key)),\
                                 'moves':[], 'last_square':key,'captured':False, 'captures':[]}
        
    def fill_tracker(self):
        for mov in self.moves:
            from_m, to_m = mov
            piece = [key for (key,value) in self.tracker.items() if value.get('last_square') == from_m and value.get('captured') is False][0]
            captured = [key for (key,value) in self.tracker.items() if value.get('last_square') == to_m and value.get('captured') is False]
            if len(captured) > 0:
                captured = captured[0]
                self.tracker[captured]['captured'] = True
                self.tracker[piece]['captures'].append(self.tracker[captured].get('piece'))
            self.tracker[piece]['moves'].append(to_m)
            if piece in [4,60] and abs(from_m - to_m) == 2:
                castled = self.castling_move_rook(from_m,to_m)
                self.tracker[castled['piece']]['moves'].append(castled['move'][1])
                self.tracker[castled['piece']]['last_square'] = castled['move'][1]
            self.tracker[piece]['last_square'] = to_m
            
    def get_mean_elo(self):
        try:
            mean = (int(self.black_elo) + int(self.white_elo))/2
            return mean
        except:
            return np.nan
        
    def get_dataframe(self):
        df = pd.DataFrame.from_dict(self.tracker, orient='index')
        df['game_id'] = self.game_id
        df['mean_elo'] = self.get_mean_elo()
        
        return df

In [8]:
games = []
with open('data/lichess_db_standard_rated_2013-02.pgn', 'r') as pgn_file:
    game = chess.pgn.read_game(pgn_file)
    while game != None:
        games.append(Match(game))
        game = chess.pgn.read_game(pgn_file)

In [None]:
games[0].fill_tracker()

In [14]:
games[0].get_dataframe()

Unnamed: 0,piece,moves,last_square,captured,captures,game_id,mean_elo
63,♜-h8,[62],62,False,[],9tp6v4ps,1339.5
62,♞-g8,[45],45,True,[],9tp6v4ps,1339.5
61,♝-f8,[],61,False,[],9tp6v4ps,1339.5
60,♚-e8,[],60,False,[],9tp6v4ps,1339.5
59,♛-d8,"[43, 35, 36, 9]",9,True,"[♘-b1, ♙-b2]",9tp6v4ps,1339.5
58,♝-c8,[],58,False,[],9tp6v4ps,1339.5
57,♞-b8,"[42, 25, 35]",35,True,[♙-d2],9tp6v4ps,1339.5
56,♜-a8,[],56,False,[],9tp6v4ps,1339.5
55,♟-h7,"[47, 39]",39,False,[],9tp6v4ps,1339.5
54,♟-g7,[45],45,False,[♗-c1],9tp6v4ps,1339.5


In [15]:
for i in range(1,len(games)):
    games[i].fill_tracker()
data = pd.concat([i.get_dataframe() for i in games])

In [16]:
data

Unnamed: 0,piece,moves,last_square,captured,captures,game_id,mean_elo
63,♜-h8,[62],62,False,[],9tp6v4ps,1339.5
62,♞-g8,[45],45,True,[],9tp6v4ps,1339.5
61,♝-f8,[],61,False,[],9tp6v4ps,1339.5
60,♚-e8,[],60,False,[],9tp6v4ps,1339.5
59,♛-d8,"[43, 35, 36, 9]",9,True,"[♘-b1, ♙-b2]",9tp6v4ps,1339.5
...,...,...,...,...,...,...,...
4,♔-e1,"[6, 5, 14, 21, 30, 21, 30, 39, 38, 46, 54]",54,False,"[♟-e7, ♟-h7, ♛-d8]",d1dym437,1400.0
3,♕-d1,"[17, 9, 36, 44, 42, 33, 41, 20, 11, 38]",38,True,"[♝-f8, ♞-b8, ♟-c7, ♜-h8, ♟-g7]",d1dym437,1400.0
2,♗-c1,"[16, 2, 9]",9,True,[♞-g8],d1dym437,1400.0
1,♘-b1,[11],11,True,[],d1dym437,1400.0


In [17]:
data.sort_values(by='mean_elo')

Unnamed: 0,piece,moves,last_square,captured,captures,game_id,mean_elo
0,♖-a1,[],0,True,[],kxwtoj8o,921.0
63,♜-h8,[60],60,False,[],kxwtoj8o,921.0
62,♞-g8,[45],45,True,[],kxwtoj8o,921.0
61,♝-f8,"[25, 18]",18,True,[♘-b1],kxwtoj8o,921.0
60,♚-e8,[59],59,False,[],kxwtoj8o,921.0
...,...,...,...,...,...,...,...
4,♔-e1,"[2, 1, 10]",10,False,[♜-a8],wloomohl,
3,♕-d1,"[27, 45]",45,True,"[♞-b8, ♛-d8]",wloomohl,
2,♗-c1,[11],11,True,[],wloomohl,
1,♘-b1,"[18, 35]",35,True,[♟-c7],wloomohl,
