In [1]:
import chess.pgn
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set(style="white")
from collections import Counter
import networkx as nx
import plotly.express as px
import plotly.graph_objects as go

In [125]:
## source: https://github.com/dsharpc/ChessNetworks/blob/master/data_parser/notebooks/Parser.ipynb
class Match:
    def __init__(self, game):
        self.game = game
        self.white = game.headers.get('White')
        self.black = game.headers.get('Black')
        self.game_id = game.headers.get('Site').split('/')[-1]
        self.moves = self.get_moves()
        self.tracker = game.board().piece_map()
        self.start_tracker()
        self.black_elo = game.headers.get('BlackElo')
        self.white_elo = game.headers.get('WhiteElo')
        self.opening = game.headers.get('Opening')
        self.eco = game.headers.get('ECO')
        self.termination = game.headers.get('Termination')
        self.date = game.headers.get('UTCDate')
        self.winner = self.get_winner(game.headers.get('Result'))
        self.checkmate = True if str(game.mainline_moves())[-1] == '#' else False
        self.mainline_moves = game.mainline_moves()
        self.castle_tracker = {'white':0, 'black':0}
        self.white_change = game.headers.get('WhiteRatingDiff') 
        self.black_change = game.headers.get('BlackRatingDiff')
    
    @staticmethod
    def get_winner(results):
        results = results.split('-')
        if results[0] == '1':
            return 'White'
        if results[0] == '1/2':
            return 'Tie'
        else:
            return 'Black'
        
    @staticmethod 
    def castling_move_rook(from_square, to_square):
        if from_square == 4:
            if to_square == 6:
                return {'piece':7, 'move':(7,5)}
            elif to_square == 2:
                return {'piece':0, 'move':(0,3)}
        elif from_square == 60:
            if to_square == 62:
                return {'piece':63, 'move':(63,61)}
            elif to_square == 58:
                return {'piece':56, 'move':(56,59)}
    
    def get_moves(self):
        moves = []
        for move in self.game.mainline_moves():
            from_m = move.from_square
            to_m = move.to_square
            moves.append((from_m, to_m))
        return moves
    
    @staticmethod
    def _to_uci(square):
        square = int(square)
        letter = chr(ord('a') + ((square)%8)) 
        number = square//8+1
        return f"{letter}{number}"
    
    def start_tracker(self):
        for key in self.tracker.keys():
            self.tracker[key] = {'piece': self.tracker[key].unicode_symbol()+'-'+str(self._to_uci(key)),\
                                 'moves':[], 'last_square':key,'captured':False, 'captures':[], 'move_nums':[]}
        
    def fill_tracker(self):
        for idx,mov in enumerate(self.moves):
            from_m, to_m = mov
            piece = [key for (key,value) in self.tracker.items() if value.get('last_square') == from_m and value.get('captured') is False][0]
            captured = [key for (key,value) in self.tracker.items() if value.get('last_square') == to_m and value.get('captured') is False]
            if len(captured) > 0:
                captured = captured[0]
                self.tracker[captured]['captured'] = True
                self.tracker[piece]['captures'].append(self.tracker[captured].get('piece'))
            self.tracker[piece]['moves'].append(to_m)
            if piece in [4,60] and abs(from_m - to_m) == 2:
                castled = self.castling_move_rook(from_m,to_m)
                self.tracker[castled['piece']]['moves'].append(castled['move'][1])
                self.tracker[castled['piece']]['last_square'] = castled['move'][1]
                self.castle_tracker['white' if piece == 4 else 'black'] += 1
            self.tracker[piece]['last_square'] = to_m
            self.tracker[piece]['move_nums'].append(idx)
        if self.checkmate == True:
            self.checkmate = self.tracker[piece]['piece']                
                
            
    def get_mean_elo(self):
        try:
            mean = (int(self.black_elo) + int(self.white_elo))/2
            return mean
        except:
            return np.nan
        
    def get_dataframe(self):
        df = pd.DataFrame.from_dict(self.tracker, orient='index')
        df['game_id'] = self.game_id
        df['mean_elo'] = self.get_mean_elo()
        df['Checkmate'] = self.checkmate
        df['Termination'] = self.termination
        df['white'] = self.white
        df['black'] = self.black
        
        return df

In [126]:
games = {}
limit = 5000 # for testing
with open('data/lichess_db_standard_rated_2013-02.pgn', 'r') as pgn_file:
    game = chess.pgn.read_game(pgn_file)
    L = 0
    while game != None:
        match = Match(game)
        idx = match.game_id
        games[idx] = match
        game = chess.pgn.read_game(pgn_file)
        L += 1
        if L == limit: break

In [127]:
for game in games.values():
    game.fill_tracker()
data = pd.concat([i.get_dataframe() for i in games.values()])

In [86]:
data[data['Checkmate'] != False]

Unnamed: 0,piece,moves,last_square,captured,captures,move_nums,game_id,mean_elo,Checkmate,Termination,white,black
63,♜-h8,[],63,False,[],[],b9tstv2z,1836.0,♕-d1,Normal,dvorak,Kiriush
62,♞-g8,"[45, 39, 45, 28]",28,False,[♘-b1],"[7, 27, 39, 41]",b9tstv2z,1836.0,♕-d1,Normal,dvorak,Kiriush
61,♝-f8,[52],52,False,[],[21],b9tstv2z,1836.0,♕-d1,Normal,dvorak,Kiriush
60,♚-e8,"[61, 54]",54,False,[],"[31, 37]",b9tstv2z,1836.0,♕-d1,Normal,dvorak,Kiriush
59,♛-d8,[],59,False,[],[],b9tstv2z,1836.0,♕-d1,Normal,dvorak,Kiriush
...,...,...,...,...,...,...,...,...,...,...,...,...
4,♔-e1,"[6, 13]",13,False,[],"[40, 74]",huknxzib,1320.5,♕-d1,Normal,HonchoIII,-ManOwaR-
3,♕-d1,"[19, 40, 24, 33, 40, 33, 49, 58, 63, 47, 55, 1...",10,False,"[♟-a7, ♞-g8]","[22, 24, 28, 34, 62, 64, 66, 76, 78, 80, 82, 8...",huknxzib,1320.5,♕-d1,Normal,HonchoIII,-ManOwaR-
2,♗-c1,"[20, 11, 29, 38]",38,True,[♟-g7],"[10, 38, 42, 44]",huknxzib,1320.5,♕-d1,Normal,HonchoIII,-ManOwaR-
1,♘-b1,"[18, 33, 18, 33]",33,True,[♟-b7],"[2, 26, 30, 32]",huknxzib,1320.5,♕-d1,Normal,HonchoIII,-ManOwaR-


In [6]:
data.sort_values(by='mean_elo')

Unnamed: 0,piece,moves,last_square,captured,captures,move_nums,game_id,mean_elo,Checkmate,Termination,white,black
63,♜-h8,[],63,False,[],[],hcrm1twx,1198.5,False,Normal,kualalumpur,MiquelLHuma
0,♖-a1,[],0,False,[],[],hcrm1twx,1198.5,False,Normal,kualalumpur,MiquelLHuma
1,♘-b1,"[11, 5]",5,False,[],"[14, 28]",hcrm1twx,1198.5,False,Normal,kualalumpur,MiquelLHuma
2,♗-c1,[],2,False,[],[],hcrm1twx,1198.5,False,Normal,kualalumpur,MiquelLHuma
3,♕-d1,"[19, 17, 24, 32, 34, 48]",48,False,"[♝-f8, ♟-b7, ♟-a7]","[12, 16, 22, 24, 32, 34]",hcrm1twx,1198.5,False,Normal,kualalumpur,MiquelLHuma
...,...,...,...,...,...,...,...,...,...,...,...,...
4,♔-e1,"[3, 10, 17]",17,False,[],"[26, 42, 46]",oiq70mvu,,False,Time forfeit,?,Jordeniel01
3,♕-d1,[11],11,True,[],[24],oiq70mvu,,False,Time forfeit,?,Jordeniel01
2,♗-c1,[],2,True,[],[],oiq70mvu,,False,Time forfeit,?,Jordeniel01
1,♘-b1,[16],16,True,[],[20],oiq70mvu,,False,Time forfeit,?,Jordeniel01


In [7]:
def first_winning_moves(games):
    first_moves = {'White':Counter(), 'Black':Counter()}
    for idx,match in games.items():
        winner = match.winner
        if winner == 'Tie': continue
        white = match.get_moves()[0]
        black = match.get_moves()[1]
        first_moves[winner].update([white if winner == 'White' else black])
    return first_moves

In [8]:
x = first_winning_moves(games)

In [9]:
def first_moves(games):
    first_moves = {'White':Counter(), 'Black':Counter()}
    for idx,match in games.items():
        white = match.get_moves()[0]
        black = match.get_moves()[1]
        first_moves['White'].update([white])
        first_moves['Black'].update([black])
    return first_moves

In [10]:
y = first_moves(games)

In [11]:
for color,counter in x.items():
    print(f'{color} - {counter.most_common()}')
    print(f'{color} - {y[color].most_common()}')

White - [((12, 28), 151), ((11, 27), 71), ((6, 21), 16), ((11, 19), 8), ((13, 29), 7), ((10, 26), 5), ((12, 20), 3), ((9, 17), 3), ((10, 18), 3), ((14, 22), 1)]
White - [((12, 28), 269), ((11, 27), 134), ((6, 21), 29), ((10, 26), 17), ((11, 19), 13), ((13, 29), 11), ((9, 17), 6), ((10, 18), 6), ((12, 20), 5), ((14, 22), 4), ((9, 25), 3), ((14, 30), 1), ((1, 18), 1), ((8, 16), 1)]
Black - [((52, 36), 77), ((51, 35), 57), ((62, 45), 22), ((50, 34), 21), ((52, 44), 12), ((49, 41), 9), ((51, 43), 8), ((54, 46), 6), ((57, 42), 3), ((50, 42), 3), ((53, 37), 2), ((49, 33), 1)]
Black - [((52, 36), 172), ((51, 35), 116), ((50, 34), 50), ((62, 45), 45), ((52, 44), 37), ((51, 43), 17), ((54, 46), 16), ((49, 41), 16), ((50, 42), 12), ((57, 42), 11), ((53, 37), 4), ((48, 32), 1), ((48, 40), 1), ((62, 47), 1), ((49, 33), 1)]


In [12]:
def move_win_rate_per_move(winners,moves):
    white_win_rates = {}
    black_win_rates = {}
    for i in moves['White'].keys():
        if i in winners['White']:
            white_win_rates[i] = winners['White'][i]/moves['White'][i]
        else:
            white_win_rates[i] = 0
    for i in moves['Black'].keys():
        if i in winners['Black']:
            black_win_rates[i] = winners['Black'][i]/moves['Black'][i]
        else:
            black_win_rates[i] = 0
    return white_win_rates,black_win_rates

In [13]:
move_win_rate_per_move(x,y)

({(12, 28): 0.5613382899628253,
  (11, 19): 0.6153846153846154,
  (11, 27): 0.5298507462686567,
  (9, 17): 0.5,
  (14, 30): 0,
  (10, 26): 0.29411764705882354,
  (9, 25): 0,
  (6, 21): 0.5517241379310345,
  (1, 18): 0,
  (12, 20): 0.6,
  (13, 29): 0.6363636363636364,
  (14, 22): 0.25,
  (10, 18): 0.5,
  (8, 16): 0},
 {(51, 35): 0.49137931034482757,
  (52, 36): 0.4476744186046512,
  (50, 34): 0.42,
  (62, 45): 0.4888888888888889,
  (52, 44): 0.32432432432432434,
  (54, 46): 0.375,
  (51, 43): 0.47058823529411764,
  (50, 42): 0.25,
  (49, 41): 0.5625,
  (57, 42): 0.2727272727272727,
  (53, 37): 0.5,
  (48, 32): 0,
  (48, 40): 0,
  (62, 47): 0,
  (49, 33): 1.0})

In [14]:
def to_uci(square):
        square = int(square)
        letter = chr(ord('a') + ((square)%8)) 
        number = square//8+1
        return f"{letter}{number}"

In [15]:
def move_win_rate(winners,moves):
    white_win_rates = {}
    black_win_rates = {}
    white_wins = sum(winners['White'].values())
    white_moves = sum(moves['White'].values())
    black_wins = sum(winners['Black'].values())
    black_moves = sum(moves['Black'].values())
    for i in moves['White'].keys():
        legible_i = (to_uci(i[0]), to_uci(i[1]))
        if i in winners['White']:
            white_win_rates[legible_i] = (round(winners['White'][i]/moves['White'][i], 3), round(moves['White'][i]/white_moves, 3))
        else:
            white_win_rates[legible_i] = (0, round(moves['White'][i]/white_moves, 3))
    for i in moves['Black'].keys():
        legible_i = (to_uci(i[0]), to_uci(i[1]))
        if i in winners['Black']:
            black_win_rates[legible_i] = (round(winners['Black'][i]/moves['Black'][i], 3), round(moves['Black'][i]/black_moves, 3))
        else:
            black_win_rates[legible_i] = (0, round(moves['Black'][i]/black_moves, 3))
    return white_win_rates,black_win_rates

In [16]:
move_win_rate(x,y)

({('e2', 'e4'): (0.561, 0.538),
  ('d2', 'd3'): (0.615, 0.026),
  ('d2', 'd4'): (0.53, 0.268),
  ('b2', 'b3'): (0.5, 0.012),
  ('g2', 'g4'): (0, 0.002),
  ('c2', 'c4'): (0.294, 0.034),
  ('b2', 'b4'): (0, 0.006),
  ('g1', 'f3'): (0.552, 0.058),
  ('b1', 'c3'): (0, 0.002),
  ('e2', 'e3'): (0.6, 0.01),
  ('f2', 'f4'): (0.636, 0.022),
  ('g2', 'g3'): (0.25, 0.008),
  ('c2', 'c3'): (0.5, 0.012),
  ('a2', 'a3'): (0, 0.002)},
 {('d7', 'd5'): (0.491, 0.232),
  ('e7', 'e5'): (0.448, 0.344),
  ('c7', 'c5'): (0.42, 0.1),
  ('g8', 'f6'): (0.489, 0.09),
  ('e7', 'e6'): (0.324, 0.074),
  ('g7', 'g6'): (0.375, 0.032),
  ('d7', 'd6'): (0.471, 0.034),
  ('c7', 'c6'): (0.25, 0.024),
  ('b7', 'b6'): (0.562, 0.032),
  ('b8', 'c6'): (0.273, 0.022),
  ('f7', 'f5'): (0.5, 0.008),
  ('a7', 'a5'): (0, 0.002),
  ('a7', 'a6'): (0, 0.002),
  ('g8', 'h6'): (0, 0.002),
  ('b7', 'b5'): (1.0, 0.002)})

In [17]:
terminations = Counter()
for key,value in games.items():
    terminations[value.termination] += 1
terminations

Counter({'Time forfeit': 173, 'Normal': 327})

## captures

In [18]:
captures_df = (data.explode('captures').groupby(['piece','captures'])['game_id'].nunique()).to_frame().reset_index().sort_values('game_id', ascending=False).assign(piece_type = lambda df: df['piece'].str.split('-').str.get(0))

In [19]:
captures_df['captured_piece_type'] = captures_df['captures'].apply(lambda x: x[0])

In [20]:
new_df = captures_df[['piece_type', 'captured_piece_type']].drop_duplicates()

In [21]:
new_df['capture_count'] = new_df.apply(lambda x: sum(captures_df[(captures_df['piece_type'] == x['piece_type']) & (captures_df['captured_piece_type'] == x['captured_piece_type'])]['game_id']), axis=1)

In [22]:
new_df['color'] = new_df['piece_type'].apply(lambda x: 'white' if ord(x) >= 9812 or ord(x) <= 9817 else 'black')

In [23]:
px.bar(new_df, x='piece_type', y='capture_count', color='captured_piece_type').write_html('captures_by_piece.html')

In [24]:
new_df.to_csv('captures_df.csv')

## moves per piece

In [25]:
def moves_per_piece(data, min_elo=1000, max_elo=1500):
    def piece_normalizer(row):
        piece = row['piece']
        if piece in  ['♟', '♙']:
            return row['number_of_moves']/8
        if piece in  ['♕', '♚', '♛', '♔']:
            return row['number_of_moves']
        else:
            return row['number_of_moves']/2

    mpr = data[['piece', 'move_nums', 'mean_elo']].copy()
    mpr['number_of_moves'] = mpr['move_nums'].apply(lambda x: len(x))
    mpr['piece'] = mpr['piece'].apply(lambda x: x[0])
    mpr = mpr[(mpr['mean_elo'] > min_elo) & (mpr['mean_elo'] < max_elo)].groupby('piece').sum().reset_index()
    mpr['normalized_moves'] = mpr.apply(piece_normalizer, axis=1)
    mpr['normalized_moves'] = mpr['normalized_moves']/mpr['normalized_moves'].sum()*100
    return px.bar(data_frame=mpr, x='piece', y='normalized_moves')

In [26]:
moves_per_piece(data).write_html('moves_per_piece.html')

## most common checkmates

In [27]:
def decolorizer(piece):
    piece = piece[0]
    if piece in ['♕', '♛']:
        return 'Queen'
    if piece in ['♟', '♙']:
        return 'Pawn'
    if piece in ['♖', '♜']:
        return 'Rook'
    if piece in ['♘', '♞']:
        return 'Knight'
    if piece in ['♗', '♝']:
        return 'Bishop'
    
def checkmates(df, min_elo, max_elo):
    cm = df[df['Checkmate'] != False].copy()
    cm['Checkmate'] = cm['Checkmate'].apply(decolorizer)
    cm = cm[['game_id', 'mean_elo', 'Checkmate']].drop_duplicates().reset_index(drop=True)
    cm = cm[(cm['mean_elo'] < max_elo) & (cm['mean_elo'] > min_elo)]
    return cm

In [28]:
px.histogram(data_frame=checkmates(data, 1500, 1600), x='Checkmate').write_html('checkmates.html') # dash will have slider for mean elo

## openings by elo

In [29]:
def openings_by_elo(data, eco='All', min_elo=1000, max_elo=1500):
    obe = data[['game_id', 'mean_elo']].copy()
    obe.drop_duplicates(inplace=True)
    obe['eco'] = obe['game_id'].apply(lambda x: games[x].eco)
    obe['open'] = obe['eco'].apply(lambda x: x[0])
    opening_name = {'A': 'Flank Openings', 'B': 'Semi-Open Games', 'C':'Open Games', 'D':'Closed Games and Semi-Closed Games', 'E':'Indian Defences'}
    obe['open name'] = obe['open'].apply(lambda x: opening_name[x])
    if eco == 'All':
        return px.pie(obe[(obe['mean_elo'] > min_elo) & (obe['mean_elo'] < max_elo)], 'open name')
    else:
        obe = obe[obe['open'] == eco]
        return px.pie(obe[(obe['mean_elo'] > min_elo) & (obe['mean_elo'] < max_elo)], 'eco')

In [30]:
openings_by_elo(data, 'A', 500, 1300).write_html('openings by elo.html')

## white/black diff by win

In [31]:
def white_black_rating_diff(data, color, trendline='lowess', min_elo=0, max_elo=5000):
    rating_diff_win_rate = data[['game_id']].copy()
    rating_diff_win_rate['white elo'] = rating_diff_win_rate['game_id'].apply(lambda x: int(games[x].white_elo) if games[x].white_elo != '?' else '?')
    rating_diff_win_rate['black elo'] = rating_diff_win_rate['game_id'].apply(lambda x: int(games[x].black_elo) if games[x].black_elo != '?' else '?')
    rating_diff_win_rate = rating_diff_win_rate[(rating_diff_win_rate['white elo'] != '?') & (rating_diff_win_rate['black elo'] != '?')]
    rating_diff_win_rate['winner'] = rating_diff_win_rate['game_id'].apply(lambda x: games[x].winner)
    rating_diff_win_rate['white-black diff'] = rating_diff_win_rate['white elo'] - rating_diff_win_rate['black elo']
    rating_diff_win_rate.drop_duplicates(inplace=True)
    rating_diff_win_rate.reset_index(inplace=True, drop=True)
    if color == 'white':
        col = 'white elo'
    else:
        col = 'black elo'
   
    rating_diff_win_rate = rating_diff_win_rate[(rating_diff_win_rate[col] >= min_elo) & (rating_diff_win_rate[col] <= max_elo)]
    c = 'winner'
    if rating_diff_win_rate.empty:
        c=None
    return px.scatter(rating_diff_win_rate, col, 'white-black diff', color=c, trendline=trendline)

In [32]:
white_black_rating_diff(data,'white', min_elo = 5000).write_html('white-black diff.html')

## some player stats

In [33]:
def average_time_per_move(data, games, min_elo, max_elo):
    data = data[['game_id', 'mean_elo']]
    data['num_moves'] = data['game_id'].apply(lambda x: games)

## first move, second move, win rate

In [77]:
openings_dict = {'white first move':[], 'white second move':[], 'black first move':[], 'black second move':[], 'white':[], 'black':[], 'tie':[]}
for game in games.values():
    move_counter = 0
    moves = []
    for move in game.mainline_moves:
        moves.append(str(move))
        if move_counter == 3:
            break
        move_counter += 1
    if len(moves) == 4:
        openings_dict['white first move'].append(moves[0])
        openings_dict['black first move'].append(moves[1])
        openings_dict['white second move'].append(moves[2])
        openings_dict['black second move'].append(moves[3])
        if game.winner == 'White':
            openings_dict['white'].append(1)
            openings_dict['black'].append(0)
            openings_dict['tie'].append(0)
        elif game.winner == 'Black':
            openings_dict['white'].append(0)
            openings_dict['black'].append(1)
            openings_dict['tie'].append(0)
        else:
            openings_dict['white'].append(0)
            openings_dict['black'].append(0)
            openings_dict['tie'].append(1)
four_moves = pd.DataFrame(openings_dict)
four_moves

Unnamed: 0,white first move,white second move,black first move,black second move,white,black,tie
0,e2e4,e4d5,d7d5,e7e6,1,0,0
1,e2e4,g1f3,e7e5,b8c6,1,0,0
2,e2e4,g1f3,e7e5,d7d6,1,0,0
3,e2e4,e4d5,d7d5,d8d5,1,0,0
4,d2d4,c2c4,g8f6,g7g6,0,1,0
...,...,...,...,...,...,...,...
493,g1f3,d2d4,g8f6,e7e6,0,1,0
494,d2d4,c2c4,d7d5,c7c5,1,0,0
495,d2d4,c2c4,b8c6,c6a5,1,0,0
496,g2g3,g1f3,d7d5,c7c6,1,0,0


In [36]:
def first_four_moves(four_moves, white_first = None, black_first = None, white_second = None):
    if white_first == None:
        return px.bar(four_moves, 'white first move', color='winner').update_traces(hovertemplate=None, hoverinfo='skip')
    if black_first == None:
        return px.bar(four_moves[four_moves['white first move'] == white_first], 'black first move', color='winner').update_traces(hovertemplate=None, hoverinfo='skip')
    if white_second == None:
        four_moves = four_moves[(four_moves['white first move'] == white_first) & (four_moves['black first move'] == black_first)]
        return px.bar(four_moves, 'white second move', color='winner').update_traces(hovertemplate=None, hoverinfo='skip')
    else:
        four_moves = four_moves[(four_moves['white first move'] == white_first) & (four_moves['black first move'] == black_first) & (four_moves['white second move'] == white_second)]
        return px.bar(four_moves, 'black_second_move', color='winner').update_traces(hovertemplate=None, hoverinfo='skip')

In [40]:
first_four_moves(four_moves).write_html('first_four_moves.html')

In [80]:
four_moves = four_moves[four_moves['white first move'] == 'e2e4'][['black first move', 'white', 'black', 'tie']].copy()
four_moves['white'] = four_moves.groupby(['black first move'])['white'].transform('sum')
four_moves['black'] = four_moves.groupby(['black first move'])['black'].transform('sum')
four_moves['tie'] = four_moves.groupby(['black first move'])['tie'].transform('sum')
four_moves.drop_duplicates()

Unnamed: 0,black first move,white,black,tie
0,d7d5,10,8,0
1,e7e5,81,62,3
6,c7c5,22,18,1
13,e7e6,14,4,0
14,c7c6,6,3,1
16,d7d6,3,6,0
28,g8f6,0,3,0
42,g7g6,3,1,1
68,b8c6,3,3,0
204,f7f5,1,0,0


In [79]:
fig = go.Figure(data = [
    go.Bar(name='White', x=tmp['black first move'], y=tmp['white'], text=tmp['white'], textposition='auto', hovertemplate="White wins: %{y}"),
    go.Bar(name='Black', x=tmp['black first move'], y=tmp['black'], text=tmp['black'], textposition='auto', hovertemplate="Black wins: %{y}"),
    go.Bar(name='Tie', x=tmp['black first move'], y=tmp['tie'], text=tmp['tie'], textposition='auto', hovertemplate="Tie games: %{y}")
])
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.write_html('four_moves_test.html')

In [82]:
tmp.columns[0]

'black first move'

## classifying winers by white/black diff and predicting rating change

In [39]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [160]:
rating_diff_win_rate = data[['game_id']].copy()
rating_diff_win_rate['white change'] = rating_diff_win_rate['game_id'].apply(lambda x: int(games[x].white_change[1:]) if games[x].white_change else -100)
rating_diff_win_rate['black change'] = rating_diff_win_rate['game_id'].apply(lambda x: int(games[x].black_change[1:]) if games[x].black_change else -100)
rating_diff_win_rate = rating_diff_win_rate[rating_diff_win_rate['white change'] != -100]
rating_diff_win_rate = rating_diff_win_rate[rating_diff_win_rate['black change'] != -100]
rating_diff_win_rate['white elo'] = rating_diff_win_rate['game_id'].apply(lambda x: int(games[x].white_elo) if games[x].white_elo != '?' else '?')
rating_diff_win_rate['black elo'] = rating_diff_win_rate['game_id'].apply(lambda x: int(games[x].black_elo) if games[x].black_elo != '?' else '?')
rating_diff_win_rate = rating_diff_win_rate[(rating_diff_win_rate['white elo'] != '?') & (rating_diff_win_rate['black elo'] != '?')]
rating_diff_win_rate['winner'] = rating_diff_win_rate['game_id'].apply(lambda x: games[x].winner)
rating_diff_win_rate['white-black diff'] = rating_diff_win_rate['white elo'] - rating_diff_win_rate['black elo']
rating_diff_win_rate.drop_duplicates(inplace=True)
rating_diff_win_rate.reset_index(inplace=True, drop=True)

In [161]:
rating_diff_win_rate = rating_diff_win_rate[rating_diff_win_rate['winner'] != 'Tie']

In [162]:
rating_diff_win_rate['winner'].value_counts()

White    2614
Black    2225
Name: winner, dtype: int64

In [175]:
X_train, X_test, y_train, y_test = train_test_split(rating_diff_win_rate, rating_diff_win_rate, test_size=0.2, random_state=42)

In [176]:
model = LogisticRegression()

In [178]:
model.fit(X_train[['white-black diff']], y_train['winner'])

LogisticRegression()

In [180]:
model.score(X_test[['white-black diff']], y_test['winner'])

0.6776859504132231

In [167]:
model.coef_

array([[0.00446623]])

In [168]:
from sklearn.ensemble import RandomForestClassifier

In [181]:
model2 = RandomForestClassifier()
model.fit(X_train[['white-black diff']], y_train['winner'])
model.score(X_test[['white-black diff']], y_test['winner'])

0.6776859504132231

In [170]:
from sklearn.linear_model import LinearRegression

In [194]:
model3 = LinearRegression()
model3.fit(rating_diff_win_rate[rating_diff_win_rate['winner'] == 'White'][['white elo', 'black elo']], rating_diff_win_rate[rating_diff_win_rate['winner'] == 'White']['white change'])
#model3.score(X_test[X_test['winner'] == 'White'][['white elo', 'black elo']], y_test[y_test['winner'] == 'White']['white change'])

LinearRegression()

In [195]:
model3.coef_

array([-0.06411381,  0.04412775])

In [196]:
model3.intercept_

54.69199281300327

In [197]:
model3.predict([[1796, 1876]])

array([22.32724617])

In [191]:
rating_diff_win_rate

Unnamed: 0,game_id,white change,black change,white elo,black elo,winner,white-black diff
0,9tp6v4ps,5,20,1452,1227,White,225
1,b9tstv2z,13,14,1796,1876,White,-80
2,azjm2dih,8,8,1650,1534,White,116
3,yfe3lngr,7,6,1807,1637,White,170
4,xslxnflt,11,15,1457,1442,White,15
...,...,...,...,...,...,...,...
4981,qjmco8g3,29,5,1232,1492,Black,-260
4982,cw7mr4ek,4,6,1592,1874,Black,-282
4983,zfh3vf8w,5,6,1602,1862,Black,-260
4984,99uh9xqz,4,5,1674,1926,Black,-252
