### Surface model example

This example shows how to fit the model taking margin of victory and surface into account.

There is a set of convenient functions in `jax_elo.models.correlated_skills_model` which we will use for this.

In [None]:
import os; os.environ['CUDA_VISIBLE_DEVICES'] = ''
from jax_elo.models.correlated_skills_model import fit, calculate_ratings, predict, get_player_skill_history
from jax_elo.utils.data import get_data

In [None]:
# Change this to where your data is located.
# This uses Jeff Sackmann's dataset, which is available here:
# https://github.com/JeffSackmann/tennis_atp
data = get_data('/home/martin/data/tennis_atp/')

In [None]:
to_use = data[data['tourney_date'].dt.year >= 2010]
to_use = to_use[to_use['surface'] != 'Carpet']
to_use = to_use.dropna(subset=['spw_winner', 'spw_loser'])

In [None]:
# Compute the margin on the % of service points won
margins = to_use['spw_winner'] - to_use['spw_loser']

In [None]:
# We can ignore the matches from 2010 and 2011 using the objective mask.
objective_mask = (to_use['tourney_date'].dt.year >= 2012).values.astype(float)
# But we'll set it to None for the time being, which means we'll use all data.
objective_mask = None

In [None]:
# Fit the model -- this might take a few minutes (but not more than that)
params, opt_info = fit(to_use['winner_name'], to_use['loser_name'], to_use['surface'].values, 
                       margins=margins.values, verbose=True, objective_mask=objective_mask)

In [None]:
# The parameters found are:
params

In [None]:
# We can now calculate the rating history:
history, final_rating_dict, mark_names = calculate_ratings(params, to_use['winner_name'], to_use['loser_name'],
                                                           to_use['surface'].values, margins.values)

In [None]:
# Here's an example entry:
history[-1]

In [None]:
# We can predict matches on a particular surface
predict(final_rating_dict, params, 'Roger Federer', 'Tennys Sandgren', 'Hard', mark_names)

In [None]:
# We can plot the ratings over time:
import matplotlib.pyplot as plt
%matplotlib inline

f, ax = plt.subplots(len(mark_names), 1)

start_year = 2012

for cur_player in ['Roger Federer', 'Rafael Nadal', 'Novak Djokovic', 'Andy Murray']:

    cur_history = get_player_skill_history(history, final_rating_dict, to_use['tourney_date'].values, cur_player)
    cur_history = cur_history[cur_history.index >= f'{start_year}-01-01']
    
    for cur_surface, cur_ax in zip(mark_names, ax.reshape(-1)):
        cur_ax.step(cur_history.index, cur_history[cur_surface].values, label=cur_player)
        cur_ax.legend(ncol=4, loc='upper center')
        cur_ax.set_title(cur_surface)
        cur_ax.set_ylim(1700, 2700)

f.set_size_inches(8, 9)
f.tight_layout()


In [None]:
# We can look at the marginal standard deviations on each surface
import numpy as np
import pandas as pd

prior_sd = pd.Series(np.sqrt(np.diag(params.theta['cov_mat'])), mark_names)
print(prior_sd)

In [None]:
# Finally, we can look at the correlation matrix between surface results
def covar_to_corr(covar_mat):
    # Turns a covariance matrix into a correlation matrix

    marg_var = np.diag(covar_mat)
    marg_sd = np.sqrt(marg_var)
    inv_marg = np.diag(1. / marg_sd)

    return np.dot(np.dot(inv_marg, covar_mat), inv_marg)

corr_mat = covar_to_corr(params.theta['cov_mat'])

In [None]:
pd.DataFrame(corr_mat, index=mark_names, columns=mark_names).round(3)

In [None]:
win_probs = np.stack([x['winner_prior_prob'] for x in history])
np.mean(np.log(win_probs))

In [None]:
# Compare with Elo:
from jax_elo.utils.elo import optimise_static_k, compute_elo_ratings

In [None]:
k, _ = optimise_static_k(to_use['winner_name'].values, to_use['loser_name'].values)

In [None]:
elo_results = pd.DataFrame(compute_elo_ratings(to_use['winner_name'].values, to_use['loser_name'].values, lambda _: k))

In [None]:
# Standard Elo has a lower log likelihood:
np.mean(np.log(elo_results['winner_prob']))

In [None]:
# Accuracy
np.mean(elo_results['winner_prob'] > 0.5)