## Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from  matplotlib.colors import LinearSegmentedColormap
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
def fetch_strokes_gained():
    """
    Get the expected number of strokes to complete the hole from golfity.com/strokes-gained-calculator for a given surface and yardage,
    """
    
    # Launch Chrome browser
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
    wait = WebDriverWait(driver, 5)
    driver.get('https://www.golfity.com/strokes-gained-calculator')

    # Initialize empty list to store lists of surface, yardage, and expected strokes
    xstrokes = []
    for surface in ['Tee', 'Fairway', 'Rough', 'Sand', 'Green']:

        # Click new surface
        dropdown = Select(driver.find_element(by=By.XPATH, value='/html/body/div[2]/div/div/div[1]/div/form/div[1]/select'))
        dropdown.select_by_visible_text(surface)

        # The max yardage for "Green" is 119, for all other surfaces the max yardage is 600
        if surface == "Green":
            max_yardage = 120
        else:
            max_yardage = 601

        # Fetch the expected number of strokes for every whole number distance from 1 to max
        for yardage in range(1, max_yardage):

            # Enter the yardage
            wait.until(ec.visibility_of_element_located((By.XPATH, '/html/body/div[2]/div/div/div[1]/div/form/div[1]/input')))
            driver.find_element(by=By.XPATH, value='/html/body/div[2]/div/div/div[1]/div/form/div[1]/input').clear()
            inputElement = driver.find_element(by=By.XPATH, value="/html/body/div[2]/div/div/div[1]/div/form/div[1]/input")
            inputElement.send_keys(f"{yardage}")

            # Store the expected strokes
            benchmark = driver.find_element(by=By.XPATH, value='/html/body/div[2]/div/div/div[1]/div/form/div[1]/div').text
            xstrokes.append([surface, yardage, benchmark])

    # Turn the yardage into a float and return dataframe
    xstrokes_df = pd.DataFrame(xstrokes, columns=['surface', 'yardage', 'benchmark'])
    xstrokes_df['xstrokes'] = xstrokes_df['benchmark'].str.split(': ').str[1].str.split(' ').str[0]
    xstrokes_df['xstrokes'] = xstrokes_df['xstrokes'].astype('float')
    xstrokes_df.drop('benchmark', axis=1, inplace=True)
    
    return xstrokes_df

In [None]:
# Read in expected strokes
xstrokes = pd.read_csv("C:/Users/JToffler/Desktop/xstrokes.csv")

In [None]:
def simulate_stroke_probs(n_trials, xstrokes):
    """
    Using a normal distribution, model likelihood of n strokes to complete the hole given a distance
    """
    
    # Initiate empty lists to store probabilities of finishing in n strokes from each distance
    probs_1 = []
    probs_2 = []
    probs_3 = []
    probs_4 = []
    probs_5 = []
    probs_6 = []
    probs_7 = []

    # Iterate through each row, model the expected number of strokes given a distance
    for ix, row in xstrokes.iterrows():

        # Model is a normal distribution with mean=xstrokes and stdev=SQRT(strokes)/strokes)
        scores = []

        # Randomly sample many times and count the frequency of each pull
        for i in range(n_trials):
            scores.append(round(np.random.normal(loc=row['xstrokes'], scale=np.sqrt(row['xstrokes'])/row['xstrokes'])))

        # Turn the frequency into a decimal
        probs_1.append(scores.count(1) / n_trials)
        probs_2.append(scores.count(2) / n_trials)
        probs_3.append(scores.count(3) / n_trials)
        probs_4.append(scores.count(4) / n_trials)
        probs_5.append(scores.count(5) / n_trials)
        probs_6.append(scores.count(6) / n_trials)
        probs_7.append(len([score for score in scores if score >= 7]) / n_trials)

    # Store as new columns
    xstrokes['prob_1'] = probs_1
    xstrokes['prob_2'] = probs_2
    xstrokes['prob_3'] = probs_3
    xstrokes['prob_4'] = probs_4
    xstrokes['prob_5'] = probs_5
    xstrokes['prob_6'] = probs_6
    xstrokes['prob_7'] = probs_7
    
    return xstrokes

In [None]:
def calculate_outcome_probs(xstrokes, strokes_hero, dist_hero, strokes_villain, dist_villain):
    """
    Calculate the probability of winning the hole, tying the hole, and losing the hole given the current state of a hole
    """
    
    # Get the highest number of additional strokes in the xstrokes document
    prob_cols = [x for x in list(xstrokes.columns) if x.startswith('prob')]
    max_strokes = max([int(x.split('_')[1]) for x in prob_cols])
    
    # Get the probabilities of n additional strokes given the distance for hero and villain
    xstrokes_hero = xstrokes[xstrokes['distance'] == dist_hero].copy()
    xstrokes_villain = xstrokes[xstrokes['distance'] == dist_villain].copy()
    
    # Initialize probabilities at 0
    win_prob_hero = 0
    tie_prob = 0
    win_prob_villain = 0
    
    # Initialize empty dictionaries
    scores_hero = dict()
    scores_villain = dict()
    
    # Calculate the probability for the hero's total strokes
    for i in range(1, max_strokes + 1):
        scores_hero[i+strokes_hero] = xstrokes_hero[f"prob_{i}"].values[0]
    
    # Calculate the probability for the villain's total strokes
    for i in range(1, max_strokes + 1):
        scores_villain[i+strokes_villain] = xstrokes_villain[f"prob_{i}"].values[0]
    
    # For every combination of hero score and villain score, add the joint probability to the result probability
    for score_hero, prob_hero in scores_hero.items():
        for score_villain, prob_villain in scores_villain.items():
            if score_hero < score_villain:
                win_prob_hero += prob_hero*prob_villain
                
            elif score_hero > score_villain:
                win_prob_villain += prob_hero*prob_villain
                
            elif score_hero == score_villain:
                tie_prob += prob_hero*prob_villain        
    
    return win_prob_hero, tie_prob, win_prob_villain

In [None]:
# tall_df
win_prob_list = []
for p1_dist in range(100, 205, 5):
    for p2_dist in range(100, 205, 5):
        p_win, p_tie, p_lose = win_probs(xstrokes, 1, p1_dist, 1, p2_dist)
        
        win_prob_list.append([p1_dist, p2_dist, p_win, p_tie, p_lose])
        
win_prob_df = pd.DataFrame(win_prob_list, columns=['p1_dist', 'p2_dist', 'p_win', 'p_tie', 'p_lose'])

win_prob_df['x_points'] = win_prob_df['p_win'] + -1*win_prob_df['p_lose']

In [None]:
hole_dist_list = [400]*15

hole_dict = dict()
for i in range(len(hole_dist_list)):
    hole_dict[i+1] = dict()
    
    hole_dict[i+1]['win'] = win_probs(xstrokes, 0, hole_dist_list[i], 0, hole_dist_list[i])[0]
    hole_dict[i+1]['tie'] = win_probs(xstrokes, 0, hole_dist_list[i], 0, hole_dist_list[i])[1]
    hole_dict[i+1]['lose'] = win_probs(xstrokes, 0, hole_dist_list[i], 0, hole_dist_list[i])[2]

In [None]:
match_xpoints_dict = dict()

for i in range(len(hole_dist_list)):
    match_xpoints_dict[i+1] = dict()

# Get the expected points on hole 15
match_xpoints_dict[15][1] = win*hole_dict[15]['win'] + win*hole_dict[15]['tie'] + tie*hole_dict[15]['lose']
match_xpoints_dict[15][0] = win*hole_dict[15]['win'] + tie*hole_dict[15]['tie'] + loss*hole_dict[15]['lose']
match_xpoints_dict[15][-1] = tie*hole_dict[15]['win'] + loss*hole_dict[15]['tie'] + loss*hole_dict[15]['lose']

for i in range(2, 16):
    match_xpoints_dict[15][i] = win
    match_xpoints_dict[15][-i] = loss