In [1]:
import chess
import chess.pgn
from elo import Elo
import glob
import numpy as np
import pandas as pd

In [2]:
# Hypothesis: there are fewer draws in these rapid games than in OTB games. I assume
# that the parameters used in the Elo library have been calibrated for OTB games. So,
# here I try to find new values for advantage for white (presumably higher than .1)
# and draw "advantage" (presumably lower than .6).

In [3]:
def read_games(handle):
    while True:
        game = chess.pgn.read_game(handle)
        
        if game is None:
            return
        
        yield game

In [4]:
games = pd.DataFrame(game.headers
                     for file in glob.glob('games/**/*.pgn', recursive=True)
                     for game in read_games(open(file))
                     if 'WhiteElo' in game.headers and 'BlackElo' in game.headers)

In [5]:
games.head()

Unnamed: 0,Event,Site,Date,Round,White,Black,Result,WhiteTitle,BlackTitle,WhiteElo,BlackElo,ECO,Opening,Variation,WhiteTeam,BlackTeam,WhiteFideId,BlackFideId,EventDate
0,PRO League Group Stage,chess.com INT,2018.01.18,1,"Puranik, Abhimanyu","Gupta, Abhijeet",0-1,GM,GM,2520,2610,C07,French,"Tarrasch, Eliskases variation",Mumbai Movers,Delhi Dynamite,5061245,5010608,2018.01.18
1,PRO League Group Stage,chess.com INT,2018.01.18,1,"Puranik, Abhimanyu","Aradhya, Garg",1-0,GM,,2520,2289,C72,Ruy Lopez,"modern Steinitz defence, 5.O-O",Mumbai Movers,Delhi Dynamite,5061245,25006746,2018.01.18
2,PRO League Group Stage,chess.com INT,2018.01.18,1,"Gupta, Abhijeet","Kunte, Abhijit",1/2-1/2,GM,GM,2610,2499,A15,English opening,,Delhi Dynamite,Mumbai Movers,5010608,5002265,2018.01.18
3,PRO League Group Stage,chess.com INT,2018.01.18,1,"Gupta, Abhijeet","Ghosh, Diptayan",1-0,GM,GM,2610,2556,A05,Reti,King's Indian attack,Delhi Dynamite,Mumbai Movers,5010608,5045207,2018.01.18
4,PRO League Group Stage,chess.com INT,2018.01.18,1,"Kunte, Abhijit","Lalith, Babu M R",1/2-1/2,GM,GM,2499,2542,A14,English,Neo-Catalan declined,Mumbai Movers,Delhi Dynamite,5002265,5024595,2018.01.18


In [6]:
def g(x):
    s = x['Result']
    return pd.Series({'white': 1 if s == '1-0' else 0,
                      'black': 1 if s == '0-1' else 0,
                      'draw': 1 if s == '1/2-1/2' else 0})
    # return pd.Series([1, 0] if s == '1-0' else [0, 1] if s == '0-1' else [.5, .5], index=['white', 'black'])

obs = games.apply(g, axis=1)

In [7]:
obs.head()

Unnamed: 0,black,draw,white
0,1,0,0
1,0,0,1
2,0,1,0
3,0,0,1
4,0,1,0


In [8]:
print(games.loc[0])

Event                 PRO League Group Stage
Site                           chess.com INT
Date                              2018.01.18
Round                                      1
White                     Puranik, Abhimanyu
Black                        Gupta, Abhijeet
Result                                   0-1
WhiteTitle                                GM
BlackTitle                                GM
WhiteElo                                2520
BlackElo                                2610
ECO                                      C07
Opening                               French
Variation      Tarrasch, Eliskases variation
WhiteTeam                      Mumbai Movers
BlackTeam                     Delhi Dynamite
WhiteFideId                          5061245
BlackFideId                          5010608
EventDate                         2018.01.18
Name: 0, dtype: object


In [9]:
import scipy.optimize

In [10]:
def fun(x):
    white_advantage = x[0]
    draw_odds_advantage = x[1]
    elo = Elo(white_advantage, draw_odds_advantage)
    elo_prob = games.apply(lambda x: pd.Series(elo.probabilities(int(x['WhiteElo']), int(x['BlackElo']))), axis=1)
    return ((elo_prob - obs)**2).sum().sum()  # I have no idea what I'm doing: least squares for multiple series?!

In [11]:
fun([.1, .6])

1071.2502445239247

In [12]:
%%time
res = scipy.optimize.minimize(fun, [.1, .6], bounds=[(0, 1), (0, 1)])

CPU times: user 1min 15s, sys: 225 ms, total: 1min 15s
Wall time: 1min 16s


In [13]:
res

      fun: 1029.461683341789
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 105367.26149439,     160.9547553 ])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 132
      nit: 6
   status: 0
  success: True
        x: array([ 0.33542897,  0.46681261])

In [14]:
white_advantage = res.x[0]
draw_odds_advantage = res.x[1]

In [15]:
def gun(x):
    white_advantage = x[0]
    draw_odds_advantage = x[1]
    elo = Elo(white_advantage, draw_odds_advantage)
    elo_prob = games.apply(lambda x: pd.Series(elo.probabilities(int(x['WhiteElo']), int(x['BlackElo']))), axis=1)
    return (elo_prob - obs).unstack()

In [16]:
scipy.optimize.least_squares(gun, [.1, .6], bounds=[[0, 0], [1, 1]])

 active_mask: array([0, 0])
        cost: 505.65877455077737
         fun: array([-0.53875311,  0.11273378,  0.1956488 , ..., -0.65434695,
       -0.65953781,  0.18224532])
        grad: array([ 0.72038697, -0.16727856])
         jac: array([[ 0.02463602, -0.36092618],
       [ 0.01978862, -0.20764379],
       [ 0.02527049, -0.33983159],
       ..., 
       [ 0.02144431, -0.4179398 ],
       [ 0.02199885, -0.41566149],
       [ 0.0210231 , -0.24428105]])
     message: '`ftol` termination condition is satisfied.'
        nfev: 19
        njev: 8
  optimality: 0.11281331279847739
      status: 2
     success: True
           x: array([ 0.08616573,  0.3255961 ])