Here, we'll try to fit our very first model on TennisExplorer data

In [1]:
from tennis_new.fetch.tennis_explorer.combiner import read_joined

jd = read_joined()

  if (yield from self.run_code(code, result)):


In [2]:
jd.shape

(952932, 31)

In [3]:
# Filters 
def missing_score_error(df):
    return (
        df['p1_sets_won'].isnull() |
        df['p2_sets_won'].isnull()
    )

def possible_walkover(df):
    return (
        (df['p1_sets_won'] == 1) &
        df['p1_set1'].isnull()
    )

def retirement(df):    
    return (
        (df['p1_sets_won'] == 1) &
        df['p1_set1'].notnull()
    )

def missing_pids(df):    
    return (
        df['p1_link'].isnull() | 
        df['p2_link'].isnull()
    )

#### Back to Fitting

In [4]:
# Fit only when we have the unique identifier for both players?
bad_filter = missing_pids(jd) | missing_score_error(jd)
rel = jd[~bad_filter]

In [5]:
from tennis_new.ml.elo import ELOModel

unfiltered_elo = ELOModel(winner_mod=True)

In [6]:
%pdb
unfiltered_elo.fit_and_backfill(
    rel['p1_link'],
    rel['p2_link'],
    rel['match_link']
)

Automatic pdb calling has been turned ON


In [7]:
import pandas as pd

def get_test_set(df, test_min='2011-01-01', test_max='2015-01-01', test_surface=None, filter_walkovers=True):
    date_cond = (
        (df['date'] >= test_min) &
        (df['date'] < test_max)
    )
    if test_surface is None:
        surface_cond = True
    cond = date_cond & surface_cond
    if filter_walkovers:
        cond &= (~possible_walkover(df))
    return df[cond]

def eval_mod(mod, df, test_min='2011-01-01', test_max='2015-01-01', test_surface=None, filter_walkovers=False):
    # TODO: Filter out walkovers from test set
    history_df = pd.DataFrame(mod.history)
    test_set = get_test_set(
        df,
        test_min=test_min,
        test_max=test_max,
        test_surface=test_surface,
        filter_walkovers=filter_walkovers
    )
    test_set = pd.merge(test_set, history_df, left_on='match_link', right_on='match_id')
    print(test_set.shape[0])
    
    accuracy = (test_set['elo_match_prediction'] > 0.5).mean()
    w_odds = test_set[
        test_set['p1_odds'].notnull() &
        test_set['p2_odds'].notnull() &
        (test_set['p1_odds'] != test_set['p2_odds'])
    ]
    n_w_odds = w_odds.shape[0]
    odds_accuracy = (w_odds['p1_odds'] < w_odds['p2_odds']).mean()
    mod_odds_accuracy = (w_odds['elo_match_prediction'] > 0.5).mean()
    return {
        'overall_accuracy': accuracy,
        'odds_accuracy': odds_accuracy,
        'model_odds_accuracy': mod_odds_accuracy,
        'n_w_odds': n_w_odds
    }

In [8]:
unfiltered_eval = eval_mod(unfiltered_elo, rel)
unfiltered_eval

224662


{'overall_accuracy': 0.7275996830794705,
 'odds_accuracy': 0.7200428690759507,
 'model_odds_accuracy': 0.7074658387051017,
 'n_w_odds': 63449}

#### Try Initial Set Model

Note that this model we will definitely have to tune the ELO parameters.

In [9]:
from tennis_new.ml.elo import ELOModel

unfiltered_set_elo = ELOModel(winner_mod=True)

In [10]:
unfiltered_set_elo.fit_and_backfill(
    rel['p1_link'],
    rel['p2_link'],
    rel['match_link'],
    ys=rel[['p1_sets_won', 'p2_sets_won']].values
)

In [11]:
eval_mod(unfiltered_set_elo, rel)

224662


{'overall_accuracy': 0.7358520800135314,
 'odds_accuracy': 0.7200428690759507,
 'model_odds_accuracy': 0.7080332235338619,
 'n_w_odds': 63449}

The set model is working pretty well!  We should tune the ELO parameters again

###### Now Try Not Fitting on Matches with Retirement

It seems reasonable that we may do better if we fit without training on walkovers and retirements

In [None]:
bad_filter = possible_walkover(rel) | missing_score_error(rel)
good_filter = ~bad_filter
bad_filter.sum(), good_filter.sum()

In [None]:
# Try not fitting on filtered out matches
filtered_elo = ELOModel(winner_mod=True)
filtered_elo.fit_and_backfill(
    rel['p1_link'],
    rel['p2_link'],
    rel['match_link'],
    filter_mask=good_filter
)

In [None]:
filtered_eval = eval_mod(filtered_elo, rel)
filtered_eval

We do marginally better fitting only on the non-walkovers (on the part with odds anyway)...Let's do a hypothesis test here

In [None]:
from scipy.stats import chi2_contingency

ns1 = int(np.round(unfiltered_eval['model_odds_accuracy'] * unfiltered_eval['n_w_odds']))
nf1 = unfiltered_eval['n_w_odds'] - ns1

ns2 = int(np.round(filtered_eval['model_odds_accuracy'] * filtered_eval['n_w_odds']))
nf2 = filtered_eval['n_w_odds'] - ns2

arr = np.array([[ns1, nf1], [ns2, nf2]])
chi2, p, _, _ = chi2_contingency(arr)
p

Overall, the difference is not significant -- we also see that the accuracy decreases overall, so maybe these filters aren't great?  Let's try to filter out all retirements now...

In [None]:
bad_filter = possible_walkover(rel) | missing_score_error(rel) | retirement(rel)
good_filter = ~bad_filter
bad_filter.sum(), good_filter.sum()

In [None]:
# Try not fitting on filtered out matches
filtered_elo = ELOModel(winner_mod=True)
filtered_elo.fit_and_backfill(
    rel['p1_link'],
    rel['p2_link'],
    rel['match_link'],
    filter_mask=good_filter
)

In [None]:
filtered_eval = eval_mod(filtered_elo, rel)
filtered_eval

This is even worse!  For now, we won't do any filtering

#### Tune ELO Parameters

In [None]:
from tennis_new.ml.sobol import generate_sobol_seq, get_range_values

MIN_C = 100
MAX_C = 500
MIN_O = 0
MAX_O = 50
MIN_S = 0
MAX_S = 2


sobol_vals = generate_sobol_seq(3, 100, 1)
cs = get_range_values(MIN_C, MAX_C, sobol_vals[:, 0])
os = get_range_values(MIN_O, MAX_O, sobol_vals[:, 1])
ss = get_range_values(MIN_S, MAX_S, sobol_vals[:, 2])

In [None]:
from tqdm.autonotebook import tqdm

out = []
for c, o, s in tqdm(zip(cs, os, ss)):
    cur_elo = ELOModel(c=c, o=o, s=s, winner_mod=True)
    cur_elo.fit_and_backfill(
        rel['p1_link'],
        rel['p2_link'],
        rel['match_link'],
    )
    cur_eval = eval_mod(cur_elo, rel)
    cur_eval.update({'c': c, 'o': o, 's': s})
    out.append(cur_eval)

In [None]:
tune_df = pd.DataFrame(out)

In [None]:
tune_df.sort_values('model_odds_accuracy', ascending=False, inplace=True)
tune_df.head()

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np

def _plot_params(c, o, s):
    _x = np.arange(100)
    _y = c / (_x + o) ** s
    plt.plot(_x, _y, label='c:%0.2f, o: %0.2f, s:%0.2f' % (c, o, s))
        

def _plot_row(row):
    _plot_params(row['c'], row['o'], row['s'])

for i in range(5):
    _plot_row(tune_df.iloc[i])

_default_mod = ELOModel()
_plot_params(_default_mod.c, _default_mod.o, _default_mod.s)

plt.legend()

Above, it looks like the default parameters (those suggested by ESPN) are better than what we've found through tuning.