In [45]:
import chess
import chess.pgn
from elo import Elo
import glob
import numpy as np
import pandas as pd

In [34]:
# Hypothesis: there are fewer draws in these rapid games than in OTB games. I assume
# that the parameters used in the Elo library have been calibrated for OTB games. So,
# here I try to find new values for advantage for white (presumably higher than .1)
# and draw "advantage" (presumably lower than .6).

In [35]:
def read_games(handle):
    while True:
        game = chess.pgn.read_game(handle)
        
        if game is None:
            return
        
        yield game

In [36]:
games = pd.DataFrame(game.headers
                     for file in glob.glob('games/**/*.pgn', recursive=True)
                     for game in read_games(open(file))
                     if 'WhiteElo' in game.headers and 'BlackElo' in game.headers)

In [37]:
games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2257 entries, 0 to 2256
Data columns (total 19 columns):
Event          2257 non-null object
Site           2257 non-null object
Date           2257 non-null object
Round          2257 non-null object
White          2257 non-null object
Black          2257 non-null object
Result         2257 non-null object
WhiteTitle     2045 non-null object
BlackTitle     2052 non-null object
WhiteElo       2257 non-null object
BlackElo       2257 non-null object
ECO            2257 non-null object
Opening        2257 non-null object
Variation      1443 non-null object
WhiteTeam      2257 non-null object
BlackTeam      2257 non-null object
WhiteFideId    2257 non-null object
BlackFideId    2257 non-null object
EventDate      2257 non-null object
dtypes: object(19)
memory usage: 335.1+ KB


In [54]:
games.head()

Unnamed: 0,Event,Site,Date,Round,White,Black,Result,WhiteTitle,BlackTitle,WhiteElo,BlackElo,ECO,Opening,Variation,WhiteTeam,BlackTeam,WhiteFideId,BlackFideId,EventDate
0,PRO League Group Stage,chess.com INT,2018.01.18,1,"Puranik, Abhimanyu","Gupta, Abhijeet",0-1,GM,GM,2520,2610,C07,French,"Tarrasch, Eliskases variation",Mumbai Movers,Delhi Dynamite,5061245,5010608,2018.01.18
1,PRO League Group Stage,chess.com INT,2018.01.18,1,"Puranik, Abhimanyu","Aradhya, Garg",1-0,GM,,2520,2289,C72,Ruy Lopez,"modern Steinitz defence, 5.O-O",Mumbai Movers,Delhi Dynamite,5061245,25006746,2018.01.18
2,PRO League Group Stage,chess.com INT,2018.01.18,1,"Gupta, Abhijeet","Kunte, Abhijit",1/2-1/2,GM,GM,2610,2499,A15,English opening,,Delhi Dynamite,Mumbai Movers,5010608,5002265,2018.01.18
3,PRO League Group Stage,chess.com INT,2018.01.18,1,"Gupta, Abhijeet","Ghosh, Diptayan",1-0,GM,GM,2610,2556,A05,Reti,King's Indian attack,Delhi Dynamite,Mumbai Movers,5010608,5045207,2018.01.18
4,PRO League Group Stage,chess.com INT,2018.01.18,1,"Kunte, Abhijit","Lalith, Babu M R",1/2-1/2,GM,GM,2499,2542,A14,English,Neo-Catalan declined,Mumbai Movers,Delhi Dynamite,5002265,5024595,2018.01.18


In [61]:
games.groupby('Result').size()

Result
0-1        785
1-0        973
1/2-1/2    499
dtype: int64

In [106]:
def g(x):
    s = x['Result']
    # return pd.Series({'white': 1 if s == '1-0' else 0,
    #                   'black': 1 if s == '0-1' else 0,
    #                   'draw': 1 if s == '1/2-1/2' else 0})
    return pd.Series([1, 0] if s == '1-0' else [0, 1] if s == '0-1' else [.5, .5], index=['white', 'black'])

obs = games.apply(g, axis=1)

In [107]:
obs.head()

Unnamed: 0,white,black
0,0.0,1.0
1,1.0,0.0
2,0.5,0.5
3,1.0,0.0
4,0.5,0.5


In [108]:
obs_sum = obs.sum()

In [109]:
print(games.loc[0])

Event                 PRO League Group Stage
Site                           chess.com INT
Date                              2018.01.18
Round                                      1
White                     Puranik, Abhimanyu
Black                        Gupta, Abhijeet
Result                                   0-1
WhiteTitle                                GM
BlackTitle                                GM
WhiteElo                                2520
BlackElo                                2610
ECO                                      C07
Opening                               French
Variation      Tarrasch, Eliskases variation
WhiteTeam                      Mumbai Movers
BlackTeam                     Delhi Dynamite
WhiteFideId                          5061245
BlackFideId                          5010608
EventDate                         2018.01.18
Name: 0, dtype: object


In [110]:
import scipy.optimize

In [111]:
def fun(x):
    white_advantage = x[0]
    draw_odds_advantage = x[1]
    elo = Elo(white_advantage, draw_odds_advantage)
    elo_prob = games.apply(lambda x: pd.Series(elo.probabilities(int(x['WhiteElo']), int(x['BlackElo']))), axis=1)
    return ((elo_prob['white'] - obs['white'])**2).sum()
    # return ((elo_prob.sum() - obs_sum)**2).sum()

In [112]:
fun([.2, .6])

429.31214779958771

In [113]:
%%time
res = scipy.optimize.minimize(fun, [.2, .6], bounds=[(0, 1), (0, 1)])

CPU times: user 2min 6s, sys: 624 ms, total: 2min 7s
Wall time: 2min 7s


In [114]:
res

      fun: 360.35635314676324
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([-0.5824063 , -2.30830892])
  message: b'ABNORMAL_TERMINATION_IN_LNSRCH'
     nfev: 159
      nit: 5
   status: 2
  success: False
        x: array([ 0.51106858,  0.39786358])

In [95]:
def gun(x):
    white_advantage = x[0]
    draw_odds_advantage = x[1]
    elo = Elo(white_advantage, draw_odds_advantage)
    elo_prob = games.apply(lambda x: pd.Series(elo.probabilities(int(x['WhiteElo']), int(x['BlackElo']))), axis=1)
    # return elo_prob['white'] - obs['white']
    return (elo_prob.sum() - obs_sum)

In [96]:
scipy.optimize.least_squares(gun, [.2, .6], bounds=[[0, 0], [1, 1]])

 active_mask: array([0, 0])
        cost: 25185.09308870028
         fun: array([-175.04769935,   40.57898518,  134.46871417])
        grad: array([ 28395707.35384861,     71718.42016977])
         jac: array([[-91613.73718262,   -589.12627411],
       [  -423.39063263,   1178.25256348],
       [ 92037.1275177 ,   -589.12641907]])
     message: '`xtol` termination condition is satisfied.'
        nfev: 28
        njev: 12
  optimality: 11700669.766041616
      status: 3
     success: True
           x: array([ 0.4120577 ,  0.44094882])

In [97]:
games.groupby('Result').size()

Result
0-1        785
1-0        973
1/2-1/2    499
dtype: int64

In [115]:
# elo = Elo(0.51106858, 0.39786358)
elo = Elo(0.37397392,  0.39493437)


In [116]:
games.apply(lambda x: pd.Series(elo.probabilities(int(x['WhiteElo']), int(x['BlackElo']))), axis=1).sum()

black     643.515960
draw      499.639735
white    1113.844304
dtype: float64

In [83]:
obs.sum() # - games.apply(lambda x: pd.Series(elo.probabilities(int(x['WhiteElo']), int(x['BlackElo']))), axis=1).sum()

black    785
draw     499
white    973
dtype: int64

In [117]:
elo = Elo(0.51106858,  0.39786358)
games.apply(lambda x: pd.Series(elo.probabilities(int(x['WhiteElo']), int(x['BlackElo']))), axis=1).sum()

black     589.700004
draw      447.190432
white    1220.109565
dtype: float64

In [118]:
elo = Elo(0.39266867, 0.46936407)
games.apply(lambda x: pd.Series(elo.probabilities(int(x['WhiteElo']), int(x['BlackElo']))), axis=1).sum()

black     606.567848
draw      580.809036
white    1069.623116
dtype: float64