In [62]:
import csv
from itertools import tee, izip
from collections import defaultdict
from pprint import pprint
from random import random

def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return izip(a, b)

In [98]:
counts_won = defaultdict(int)
counts_lost = defaultdict(int)
current_counts = defaultdict(int)
serve_won, serve_total = 0, 0
with open('charting-m-points.csv') as infile:
    reader = csv.DictReader(infile)
    for point, next_point in pairwise(reader):
            
        # ignore tiebreak
        if point['TB?'] != '0':
            continue
            
        # count serves won
        serve_total += 1
        if point['isSvrWinner'] == '1':
            serve_won += 1
            
        # add to current_counts
        current_counts[point['Pts']] += 1
        
        # at end of game add to counts_won or counts_lost
        if point['GmW'] != '0':
            if point['isSvrWinner'] == '1':
                for key in current_counts:
                    counts_won[key] += current_counts[key]
            else:
                for key in current_counts:
                    counts_lost[key] += current_counts[key] 
            current_counts = defaultdict(int)

In [101]:
probs = {}
for key in counts_won:
    probs[key] = counts_won[key] / float(counts_lost[key] + counts_won[key])
probs

{'0-0': 0.7862308684195772,
 '0-15': 0.6156501726121979,
 '0-30': 0.3991306065994863,
 '0-40': 0.15857284440039643,
 '15-0': 0.8844420885237212,
 '15-15': 0.75731957481196,
 '15-30': 0.5452275564635115,
 '15-40': 0.26411853124328966,
 '30-0': 0.9531056328329431,
 '30-15': 0.8799841038548152,
 '30-30': 0.7293081299320976,
 '30-40': 0.4306023743221457,
 '40-0': 0.9888101837593676,
 '40-15': 0.9667666561414588,
 '40-30': 0.8997656390841896,
 '40-40': 0.7112881806108897,
 '40-AD': 0.42211673699015473,
 'AD-40': 0.886790439607341}

In [103]:
serve_won / float(serve_total)

0.6349880565793861

In [111]:
score_map = ['0', '15', '30', '40']
def score_to_str(score):
    p1, p2 = score
    if p1 > 3 or p2 > 3:
        subtract = min(p1, p2) - 2
        p1 -= subtract
        p2 -= subtract
    return score_map[p1] + '-' + score_map[p2]

def simulate_game(point_win_prob):
    record = []
    score = [0, 0]
    while (score[0] < 4 and score[1] < 4) or abs(score[0] - score[1]) < 2:
        record.append(tuple(score))
        if random() < point_win_prob:
            score[0] += 1
        else:
            score[1] += 1
    return record, score[0] > score[1]

def estimate_probabilities(point_win_prob, sims=int(1e6)):
    counts_won = defaultdict(int)
    counts_lost = defaultdict(int)
    for sim in range(sims):
        record, won = simulate_game(point_win_prob)
        add_to = counts_won if won else counts_lost
        for score in record:
            add_to[score_to_str(score)] += 1
    probs = {}
    for key in counts_won:
        probs[key] = counts_won[key] / float(counts_lost[key] + counts_won[key])
    return probs

In [112]:
probs2 = estimate_probabilities(0.6349)
probs2

{'0-0': 0.803921,
 '0-15': 0.6568827465174948,
 '0-30': 0.44563861289401296,
 '0-40': 0.19264284825096478,
 '15-0': 0.8883224201976957,
 '15-15': 0.7783363842022554,
 '15-30': 0.5889833007632027,
 '15-40': 0.30358593572287396,
 '30-0': 0.9513686244395452,
 '30-15': 0.8877787171925753,
 '30-30': 0.752183282715887,
 '30-40': 0.4783670820848522,
 '40-0': 0.9878036409604145,
 '40-15': 0.9667453083109919,
 '40-30': 0.9090285581553026,
 '40-40': 0.7508042058767045}

In [113]:
for key in probs2:
    print key, probs2[key] - probs[key]

0-15 0.0412325739053
0-0 0.0176901315804
30-30 0.0228751527838
15-0 0.00388033167397
40-30 0.00926291907111
40-15 -2.13478304669e-05
40-0 -0.00100654279895
40-40 0.0395160252658
30-15 0.00779461333776
15-15 0.0210168093903
15-40 0.0394674044796
30-0 -0.0017370083934
30-40 0.0477647077627
0-30 0.0465080062945
0-40 0.0340700038506
15-30 0.0437557442997
