In [1]:
import pyximport; pyximport.install()
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import set_types
import monte_carlo
from simulate import get_systems, run_trials
from tqdm import tqdm

%matplotlib inline

# Reproduce Tristan Barnett Results

## Standard scoring systems

In [2]:
spw_1, spw_2 = 0.77, 0.73

systems = get_systems()

print(systems.keys())

['wta_wimbledon', 'atp_us_open', 'fast_four_singles', 'fast_four_doubles', 'doubles', 'atp_wimbledon', 'mixed_doubles_fo', 'wta_us_open']


In [3]:
# Match up with Tristan's numbering:
system_nums = {4: 'atp_wimbledon', 5: 'wta_wimbledon',
               6: 'atp_us_open', 7: 'wta_us_open',
               8: 'mixed_doubles_fo', 9: 'doubles'}

In [4]:
def calculate_barnett_stats(sim_df):
    
    better_won = sim_df['better_won'].mean()
    pts_mean = sim_df['total_points'].mean()
    pts_std = sim_df['total_points'].std()
    pts_98 = np.percentile(sim_df['total_points'].values, 98)
    
    return {'p(A)': better_won, 'mean_pts': pts_mean,
            'std_pts': pts_std, '98th_pts': pts_98}

In [5]:
num_trials = int(1e4)

simulation_results = dict()

for system_num, key in tqdm(system_nums.items()):
    
    system_fn = systems[key]
    
    results = run_trials(system_fn, spw_1, spw_2, num_trials=num_trials)
    
    simulation_results[system_num] = calculate_barnett_stats(results)
    
simulation_results = pd.DataFrame(simulation_results)

100%|██████████| 6/6 [01:24<00:00, 12.91s/it]


In [6]:
simulation_results

Unnamed: 0,4,5,6,7,8,9
98th_pts,581.02,475.0,385.0,243.02,185.0,173.0
mean_pts,289.4725,191.9478,272.1521,166.502,143.0818,131.1806
p(A),0.725,0.6909,0.7045,0.6638,0.6642,0.6553
std_pts,98.5548,93.426881,60.745111,40.537902,21.611946,20.378545


In [8]:
# Add Barnett results for 0.77 0.73

# (i): p(A)
# (ii): mean pts
# (iii): std pts
# (iv): n/a
# (v): 98th

if spw_1 == 0.77 and spw_2 == 0.73:

    b_77 = dict()

    b_77[4] = {'p(A)': 0.723, 'mean_pts': 290.3, 'std_pts': 99.5,
               '98th_pts': 582}

    b_77[5] = {'p(A)': 0.690, 'mean_pts': 192.1, 'std_pts': 93.1,
               '98th_pts': 480}

    b_77[6] = {'p(A)': 0.708, 'mean_pts': 272.0, 'std_pts': 60.7,
               '98th_pts': 385}

    b_77[7] = {'p(A)': 0.669, 'mean_pts': 166.3, 'std_pts': 40.3,
               '98th_pts': 243}

    b_77[8] = {'p(A)': 0.656, 'mean_pts': 142.8, 'std_pts': 21.8,
               '98th_pts': 187}

    b_77[9] = {'p(A)': 0.658, 'mean_pts': 131.5, 'std_pts': 20.5,
               '98th_pts': 174}

    b_77 = pd.DataFrame(b_77)
    
    differences = simulation_results - b_77
    
else:
    
    differences = None
    
differences

Unnamed: 0,4,5,6,7,8,9
98th_pts,-0.98,-5.0,0.0,0.02,-2.0,-1.0
mean_pts,-0.8275,-0.1522,0.1521,0.202,0.2818,-0.3194
p(A),0.002,0.0009,-0.0035,-0.0052,0.0082,-0.0027
std_pts,-0.9452,0.326881,0.045111,0.237902,-0.188054,-0.121455


In [10]:
# Percentage differences

(differences / b_77) * 100

Unnamed: 0,4,5,6,7,8,9
98th_pts,-0.168385,-1.041667,0.0,0.00823,-1.069519,-0.574713
mean_pts,-0.28505,-0.07923,0.055919,0.121467,0.197339,-0.24289
p(A),0.276625,0.130435,-0.49435,-0.77728,1.25,-0.410334
std_pts,-0.94995,0.351108,0.074319,0.590328,-0.862632,-0.592462


## Conclusions

The differences seem to be at around 1% or less, indicating fairly good agreement between the simulation and the paper.