We determine the probability of winning a chess game as a function of the rating difference between the two players. We find that probability for either white or black player to win is well described by a logistic function. Probability to draw decreases with the increasing rating difference between the players.

In the second part of the notebook we investigate the advantage player gets from starting the game as a function of their rating.

# Load data, libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
all_data = pd.read_csv('../input/online-chess-games/lichess-08-2014.csv', index_col = 0)
all_data['Rating White'] = all_data['Average Rating'] + all_data['Rating Difference']/2
all_data['Rating Black'] = all_data['Average Rating'] - all_data['Rating Difference']/2
all_data.head()

In [None]:
print('Types of games:')
print(all_data['Mode'].unique())

In [None]:
#Only analyze the classical games
data = all_data[all_data['Mode'] == 'Classical']

# Investigate probability to win/draw/lose as function of rating difference between the players

In [None]:
plt.hist(data['Rating Difference'], bins = 30, log = True)
plt.xlabel('Rating difference')
plt.title('Rating difference distribution for classical games')
plt.show()

## Find win/draw ratios

In [None]:
#Probe a reasonable range of rating differences
bin_width = 50
rating_difference_bins = [[x, x+bin_width] for x in np.arange(-800, 800, bin_width)]
n_bins = len(rating_difference_bins)

#Arrays to store results
white_win_ratio = np.zeros(n_bins)
black_win_ratio = np.zeros(n_bins)
draw_ratio      = np.zeros(n_bins)

#Uncertainties of the underlying Poisson process
err_white_win_ratio = np.zeros(n_bins)
err_black_win_ratio = np.zeros(n_bins)
err_draw_ratio      = np.zeros(n_bins)

#Process bins one at the time
for bin_idx in range(n_bins):
    games_in_bin = data[
                    (data['Rating Difference'] >= rating_difference_bins[bin_idx][0])
                    &
                    (data['Rating Difference'] <  rating_difference_bins[bin_idx][1])
                   ]
    
    #Statistics
    num_white_wins = np.sum(games_in_bin['Result'] == 'White Wins')
    num_black_wins = np.sum(games_in_bin['Result'] == 'Black Wins')
    num_draws      = np.sum(games_in_bin['Result'] == 'Draw')
    num_games      = len(games_in_bin)
    
    #Outcome probabilities
    white_win_ratio[bin_idx] = num_white_wins/num_games
    black_win_ratio[bin_idx] = num_black_wins/num_games
    draw_ratio[bin_idx]      = num_draws/num_games
    
    #Their uncertainties
    err_white_win_ratio[bin_idx] = np.sqrt(white_win_ratio[bin_idx] / num_games)
    err_black_win_ratio[bin_idx] = np.sqrt(black_win_ratio[bin_idx] / num_games)
    err_draw_ratio[bin_idx]      = np.sqrt(draw_ratio[bin_idx]      / num_games)

## Plot

In [None]:
rating_difference_midpoint = np.array([
                                (x[0] + x[1])/2. 
                                for 
                                x in rating_difference_bins
                            ])

plt.errorbar(
        rating_difference_midpoint, 
        white_win_ratio, 
        yerr = err_white_win_ratio,
        color = 'orange',
        label = 'White win'
)
plt.errorbar(
        rating_difference_midpoint, 
        black_win_ratio, 
        yerr = err_black_win_ratio,
        color = 'k',
        label = 'Black win'
)
plt.errorbar(
        rating_difference_midpoint, 
        draw_ratio, 
        yerr = err_draw_ratio,
        color = 'g',
        label = 'Draw'
)

plt.axvline(0, color = 'lightgray', ls = '--')

plt.xlabel('Player rating difference (positive = white advantage)')
plt.ylabel('Probability')
plt.ylim([0,1])
plt.legend()
plt.show()

## Fit white win ratio

In [None]:
def logistic_function(rating_difference, c0, c1):
    """
    Logistic curve with a bias and slope
    """
    return 1./(1. + np.exp(c0 + c1*rating_difference))

In [None]:
from scipy.optimize import curve_fit

In [None]:
white_win_fit = curve_fit(
                    f = logistic_function, 
                    xdata = rating_difference_midpoint, 
                    ydata = white_win_ratio,
                    sigma = err_white_win_ratio,
                    p0 = (0, 0.1)
                )
print(f'Best fit parameters (bias, slope): {white_win_fit[0]}')
white_win_best_fit_values = logistic_function(
                                rating_difference_midpoint, 
                                white_win_fit[0][0], 
                                white_win_fit[0][1]
                            )

In [None]:
plt.errorbar(
        rating_difference_midpoint, 
        white_win_ratio, 
        yerr = err_white_win_ratio,
        color = 'orange',
        label = 'Data'
)
plt.plot(
    rating_difference_midpoint,
    white_win_best_fit_values,
    color = 'r',
    ls = '--',
    label = 'Best fit (logistic)'
)

plt.axvline(0, color = 'lightgray', ls = '--')

plt.xlabel('Player rating difference (positive = white advantage)')
plt.ylabel('Probability of white win')
plt.ylim([0,1])
plt.legend()
plt.show()

In [None]:
plt.errorbar(
        rating_difference_midpoint, 
        white_win_ratio - white_win_best_fit_values, 
        yerr = err_white_win_ratio,
        color = 'orange',
        label = 'Data'
)

plt.axhline(0, color = 'lightgray', ls = '--')
plt.axvline(0, color = 'lightgray', ls = '--')

plt.xlabel('Player rating difference (positive = white advantage)')
plt.ylabel('White Win Best Fit Residuals')
plt.ylim([-0.05, 0.05])
plt.legend()
plt.show()

We get a decent logistic fit, although there are hints that in games with rating difference between 50 and 100 white has an extra advantage. Zoom in:

In [None]:
plt.errorbar(
        rating_difference_midpoint, 
        white_win_ratio, 
        yerr = err_white_win_ratio,
        color = 'orange',
        label = 'Data'
)
plt.plot(
    rating_difference_midpoint,
    white_win_best_fit_values,
    color = 'r',
    ls = '--',
    label = 'Best fit'
)

plt.axvline(0, color = 'lightgray', ls = '--')

plt.xlabel('Player rating difference (positive = white advantage)')
plt.ylabel('Probability of white win')
plt.ylim([0.2,0.8])
plt.xlim([-150, 150])
plt.legend()
plt.show()

## Fit black win ratio

In [None]:
black_win_fit = curve_fit(
                    f = logistic_function, 
                    xdata = rating_difference_midpoint, 
                    ydata = black_win_ratio,
                    sigma = err_black_win_ratio,
                    p0 = (0, 0.1)
                )
print(f'Best fit parameters (bias, slope): {black_win_fit[0]}')
black_win_best_fit_values = logistic_function(
                                rating_difference_midpoint, 
                                black_win_fit[0][0], 
                                black_win_fit[0][1]
                            )

In [None]:
plt.errorbar(
        rating_difference_midpoint, 
        black_win_ratio, 
        yerr = err_black_win_ratio,
        color = 'k',
        label = 'Data'
)
plt.plot(
    rating_difference_midpoint,
    black_win_best_fit_values,
    color = 'r',
    ls = '--',
    label = 'Best fit (logistic)'
)

plt.axvline(0, color = 'lightgray', ls = '--')

plt.xlabel('Player rating difference (positive = white advantage)')
plt.ylabel('Probability of black win')
plt.ylim([0,1])
plt.legend()
plt.show()

Interestingly, black has surprisingly high chance of winning games in which they are massive underdog. Presumably because of a white blunder.

In [None]:
plt.errorbar(
        rating_difference_midpoint, 
        black_win_ratio - black_win_best_fit_values, 
        yerr = err_black_win_ratio,
        color = 'k',
        label = 'Data'
)

plt.axhline(0, color = 'lightgray', ls = '--')
plt.axvline(0, color = 'lightgray', ls = '--')

plt.xlabel('Player rating difference (positive = black advantage)')
plt.ylabel('Black Win Best Fit Residuals')
plt.ylim([-0.05, 0.05])
plt.legend()
plt.show()

We again see the extra win probability for rating differences between 50 and 100.

## Fit draw ratio

In [None]:
def draw_function(rating_difference, amplitude, std):
    """
    Normal distribution with amplitude and standard deviation
    """
    return amplitude*np.exp(-rating_difference**2/2./std**2)

In [None]:
draw_fit = curve_fit(
                    f = draw_function, 
                    xdata = rating_difference_midpoint[1:-2], #Avoid bin without draws 
                    ydata = draw_ratio[1:-2],
                    sigma = err_draw_ratio[1:-2],
                    p0 = (0.04, 100)
                )
print(f'Best fit parameters (amplitude, std): {draw_fit[0]}')
draw_best_fit_values = draw_function(
                                rating_difference_midpoint[1:-2], 
                                draw_fit[0][0],
                                draw_fit[0][1]
                            )

In [None]:
plt.errorbar(
        rating_difference_midpoint, 
        draw_ratio, 
        yerr = err_draw_ratio,
        color = 'g',
        label = 'Draw'
)
plt.plot(
    rating_difference_midpoint[1:-2],
    draw_best_fit_values,
    color = 'r',
    ls = '--',
    label = 'Best fit (normal)'
)

plt.axvline(0, color = 'lightgray', ls = '--')

plt.xlabel('Player rating difference (positive = white advantage)')
plt.ylabel('Probability of draw')
plt.legend()
plt.show()

# Investigate starting advantage

In [None]:
prob_white_win_at_zero_rating_diff = logistic_function(
                                0, 
                                white_win_fit[0][0], 
                                white_win_fit[0][1]
                            )
prob_black_win_at_zero_rating_diff = logistic_function(
                                0, 
                                black_win_fit[0][0], 
                                black_win_fit[0][1]
                            )
white_advantage_at_zero_rating_diff = prob_white_win_at_zero_rating_diff - prob_black_win_at_zero_rating_diff
print(f'At zero rating difference, white has {100*white_advantage_at_zero_rating_diff:.1f}% advantage.')

In [None]:
plt.hist(data['Rating White'], bins = 30, log = True)
plt.xlabel('Rating of white player')
plt.title('Rating of the white player for classical games')
plt.show()

In [None]:
#Probe a reasonable range of white ratings
bin_width = 100
white_rating_bins = [[x, x+bin_width] for x in np.arange(800, 2300, bin_width)]
n_bins_adv = len(white_rating_bins)

#Arrays to store results
starting_advantage = np.zeros(n_bins_adv)

#Process bins one at the time
for bin_idx in range(n_bins_adv):
    games_in_bin = data[
                    (data['Rating White'] >= white_rating_bins[bin_idx][0])
                    &
                    (data['Rating White'] <  white_rating_bins[bin_idx][1])
                   ]
    
    #Statistics
    num_white_wins = np.sum(games_in_bin['Result'] == 'White Wins')
    num_black_wins = np.sum(games_in_bin['Result'] == 'Black Wins')
    num_draws      = np.sum(games_in_bin['Result'] == 'Draw')
    num_games      = len(games_in_bin)
    
    #Outcome probabilities
    starting_advantage[bin_idx] = (num_white_wins - num_black_wins)/num_games

In [None]:
white_rating_midpoint = np.array([
                                (x[0] + x[1])/2. 
                                for 
                                x in white_rating_bins
                        ])

plt.errorbar(
        white_rating_midpoint, 
        starting_advantage, 
        color = 'k'
)

plt.xlabel('White player rating')
plt.ylabel('White win rate - black win rate')
plt.legend()
plt.show()

It is interesting to see that we find such a strong relationship. We rather check it is actually in the data and not a bug:

In [None]:
data[
    (data['Rating White'] >= 800) & (data['Rating White'] <  900)
]['Result'].describe()

In [None]:
data[
    (data['Rating White'] >= 2000)
]['Result'].describe()

On second thought, it makes sense - the higher our ranking, the more likely we will be matched up with a weaker opposition due to the underlying distribution of the player rankings (and vice versa).

## Correct for rating differences

Let's only look at games where the players are of similar skill (rating difference less than 50).

In [None]:
#Nearby ratings only
prob_white_win_if_white_better = white_win_ratio[n_bins//2]
prob_white_win_if_black_better = white_win_ratio[n_bins//2 - 1]
prob_black_win_if_white_better = black_win_ratio[n_bins//2]
prob_black_win_if_black_better = black_win_ratio[n_bins//2 - 1]

print(prob_white_win_if_white_better)
print(prob_white_win_if_black_better)
print(prob_black_win_if_white_better)
print(prob_black_win_if_black_better)

In [None]:
#Probe a reasonable range of white ratings
bin_width = 100
white_rating_bins = [[x, x+bin_width] for x in np.arange(900, 2300, bin_width)]
n_bins_adv = len(white_rating_bins)

#Arrays to store results
excess_starting_advantage = np.zeros(n_bins_adv)

#Process bins one at the time
for bin_idx in range(n_bins_adv):
    games_in_bin = data[
                    (data['Rating White'] >= white_rating_bins[bin_idx][0])
                    &
                    (data['Rating White'] <  white_rating_bins[bin_idx][1])
                    &
                    (abs(data['Rating Difference']) <  50)
                   ]
    num_games_where_white_better = len(games_in_bin[games_in_bin['Rating Difference'] > 0])
    num_games_where_black_better = len(games_in_bin[games_in_bin['Rating Difference'] < 0])
    
    #Statistics
    num_white_wins = np.sum(games_in_bin['Result'] == 'White Wins')
    num_black_wins = np.sum(games_in_bin['Result'] == 'Black Wins')
    num_games      = len(games_in_bin)
    
    #Expected wins
    num_expected_white_wins = (
              num_games_where_white_better * prob_white_win_if_white_better
                +
              num_games_where_black_better * prob_white_win_if_black_better
        )
    num_expected_black_wins = (
              num_games_where_white_better * prob_black_win_if_white_better
                +
              num_games_where_black_better * prob_black_win_if_black_better
        )

    expected_win_difference = num_expected_white_wins - num_expected_black_wins
    actual_win_difference   = num_white_wins          - num_black_wins
    
    #Excess advantage
    excess_starting_advantage[bin_idx] = (actual_win_difference - expected_win_difference)/num_games

In [None]:
white_rating_midpoint = np.array([
                                (x[0] + x[1])/2. 
                                for 
                                x in white_rating_bins
                        ])

plt.errorbar(
        white_rating_midpoint, 
        excess_starting_advantage, 
        color = 'k'
)

plt.xlabel('White player rating')
plt.ylabel('White win rate on top of a simple expectation')
plt.show()

In [None]:
#Check the last data point is not a fluke
data[
                    (data['Rating White'] >= 2200)
                    &
                    (data['Rating White'] <  2300)
                    &
                    (abs(data['Rating Difference']) <  50)
                   ]

Interestingly, at high ratings, balanced games tend to go to the black player! But the sample size is quite small. Also, we see that two of the black wins were due to time forfeit.

In [None]:
white_rating_midpoint = np.array([
                                (x[0] + x[1])/2. 
                                for 
                                x in white_rating_bins
                        ])

plt.errorbar(
        white_rating_midpoint, 
        excess_starting_advantage, 
        color = 'k'
)

plt.xlabel('White player rating')
plt.ylabel('White player win rate on top of a simple expectation')
plt.ylim([-0.1,0.1])
plt.axhline(0, color = 'lightgray', ls = '--')
plt.show()