In [1]:
import pandas as pd
import numpy as np
import requests
import json
import os
from functools import reduce
from datetime import datetime
from dotenv import load_dotenv
%load_ext dotenv
%dotenv

from config import API_PATH, tour_lst, config

MY_API_KEY = os.getenv('MY_API_KEY')

In [2]:
def get_draft_group_id(contest_obj, contest_type_id=84):
    draft_groups = contest_obj.draft_groups
    draft_group_id_lst = []
    for x in draft_groups:
        if x.contest_type_id == contest_type_id:
            draft_group_id_lst.append(x.draft_group_id)
            
    return min(draft_group_id_lst)

#draft_group_id = get_draft_group_id(contests)

In [3]:
def get_draftables(draft_group_id):
    df_lst = []
    player_lst = Client().draftables(draft_group_id=draft_group_id).players
    for player in tqdm(player_lst):
        name_details = player.name_details
        first = name_details.first
        last = name_details.last
        name = f'{last}, {first}'
        salary = player.salary

        df = pd.DataFrame({
            'player_name': name,
            'showdown_salary': salary
        }, index=[0])

        df_lst.append(df)

    return pd.concat(df_lst).reset_index(drop=True)

# Data-golf
## Current Rounds

In [4]:
class DataGolfExtractor:
    def __init__(self,
                 tour,
                 config=config,
                 api_path=API_PATH,
                 api_key=MY_API_KEY,
                 odds_format='percent',
                ):
        """
        """
        self.tour = tour
        self.config = config
        self.api_path = api_path
        self.api_key = api_key
        self.odds_format = odds_format
    
    def get_field_updates(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?tour={self.tour}&key={self.api_key}')
        data = response.json()
        df = pd.DataFrame(data['field'])
        for col in data.keys():
            if col != 'field':
                df[col] = data[col]
                
        return df[self.config[config_key]['fields']]
    
    def get_rankings(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?&key={self.api_key}')
        data = response.json()
        df = pd.DataFrame(data['rankings'])
        
        return df[config[config_key]['fields']]
    
    def get_pre_tourney_preds(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?tour={self.tour}&odds_format={self.odds_format}&key={self.api_key}')
        data = response.json()

        model_dict = {}
        for model in data['models_available']:
            df = pd.DataFrame(data[model])
            pred_cols = ['make_cut','top_10','top_20','top_5','win']
            df.rename(
                columns={x: x + f'_{model}' for x in pred_cols}, inplace=True
            )
            
            model_dict[model] = df
        
        cols_to_use = [col for col in model_dict['baseline_history_fit'].columns if 'history' in col] + ['dg_id']
        
        df = (model_dict['baseline']
              .merge(model_dict['baseline_history_fit'][cols_to_use],
                     how='left',
                     on='dg_id'
                    )
             )
    
        return df[config[config_key]['fields']]
    
    def get_player_skill_decomps(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?tour={self.tour}&key={self.api_key}')
        data = response.json()
        df = pd.DataFrame(data['players'])
        
        return df[config[config_key]['fields']]
    
    def pull_and_merge_dfs(self):
        """
        """
        field_updates = self.get_field_updates('field_updates')
        rankings = self.get_rankings('rankings')
        pre_tourney_preds = self.get_pre_tourney_preds('pre_tourney_preds')
        skill_decomps = self.get_player_skill_decomps('skill_decomps')
        
        dfs=[field_updates,
             rankings,
             #pre_tourney_preds,
             skill_decomps]
        merged_dfs = reduce(lambda left, right: pd.merge(left, right, on='dg_id', how='left'), dfs)
        
        return merged_dfs

In [5]:
dg_extractor = DataGolfExtractor('pga')
dg_df = dg_extractor.pull_and_merge_dfs()

In [6]:
now = datetime.now().strftime('%Y_%m_%d')
dg_df.to_csv(f'pre_tourney_snapshot_{now}.csv', index=False)

In [8]:
# player_name_map = {
#     'Lee, Kyoung-Hoon':'Lee, Kyounghoon',
#     'Fitzpatrick, Matt':'Fitzpatrick, Matthew'
# }

# draftable_df['player_name'] = (np.where(draftable_df['player_name'].isin(player_name_map.keys()),
#                                         draftable_df['player_name'].map(player_name_map),
#                                         draftable_df['player_name'])
#                               )

## Historical Data
requests.get(f'https://feeds.datagolf.com/historical-raw-data/event-list&key={MY_API_KEY}')

In [12]:
#def get_historical_event_ids()

response = requests.get(f'https://feeds.datagolf.com/historical-raw-data/event-list?&key={MY_API_KEY}')
event_data = pd.DataFrame(response.json())

In [26]:
pga_tour_events = event_data.query("tour =='pga'").reset_index()

In [23]:
pga_tour_events.shape[]

Unnamed: 0,calendar_year,date,event_id,event_name,sg_categories,tour
1,2021,2021-08-15,13,Wyndham Championship,yes,pga
9,2021,2021-08-08,472,Barracuda Championship,no,pga
11,2021,2021-08-08,476,World Golf Championships-FedEx St. Jude Invita...,yes,pga
20,2021,2021-08-01,519,Olympic Men's Golf Competition,yes,pga
26,2021,2021-07-25,525,3M Open,yes,pga


In [30]:
f'https://feeds.datagolf.com/historical-raw-data/rounds?tour={tour}&event_id={event_id}&year={year}&key=dac62093607b097430db71962680'

'https://feeds.datagolf.com/historical-raw-data/rounds?tour=pga&event_id=13&year=2021&key=dac62093607b097430db71962680'

In [75]:
df.scores[0]

{'dg_id': 17550,
 'fin_text': '1',
 'player_name': 'van Rooyen, Erik',
 'round_1': {'course_name': 'Tahoe Mt. Club (Old Greenwood)',
  'course_num': 890,
  'score': 68,
  'sg_total': 2.826},
 'round_2': {'course_name': 'Tahoe Mt. Club (Old Greenwood)',
  'course_num': 890,
  'score': 64,
  'sg_total': 5.229},
 'round_3': {'course_name': 'Tahoe Mt. Club (Old Greenwood)',
  'course_num': 890,
  'score': 67,
  'sg_total': 2.5},
 'round_4': {'course_name': 'Tahoe Mt. Club (Old Greenwood)',
  'course_num': 890,
  'score': 64,
  'sg_total': 5.343}}

In [118]:
from tqdm.notebook import tqdm
rounds = [f'round_{round_n}' for round_n in range(1,5)]

round_lst = []
#for idx in tqdm(range(pga_tour_events.shape[0])):
for idx in tqdm(range(5)):
    event_id = pga_tour_events['event_id'][idx]
    year = pga_tour_events['calendar_year'][idx]
    response = requests.get(f'https://feeds.datagolf.com/historical-raw-data/rounds?tour={tour}&event_id={event_id}&year={year}&key=dac62093607b097430db71962680')
    df = pd.DataFrame(response.json())
    for idx in range(0, df.shape[0]):
        year = df.iloc[idx].year
        event_id = df.iloc[idx].event_id
        score_row = df.scores[idx]
        dg_id = score_row['dg_id']
        fin = score_row['fin_text']
        player_name = score_row['player_name']
        for r in rounds:
            round_data = score_row.get(r)
            if round_data is not None:
                round_df = pd.DataFrame(round_data, index=[0])
                round_df['dg_id'] = dg_id
                round_df['fin'] = fin
                round_df['player_name'] = player_name
                round_df['round'] = r
                round_df['year'] = year
                round_df['event_id'] = event_id
                round_lst.append(round_df)
#             else:
#                 print(f'No data for {r}')
            
    final_df = pd.concat(round_lst, sort=True)
        
#         score_df = pd.DataFrame(df['scores'][idx])
#         score_dfs.append(score_df)
        
#     full_score_df = pd.concat(score_dfs, sort=True)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [151]:
group_cols = ['course_name','course_num','event_id','fin','dg_id','player_name', 'year']
agg_cols = ['score','sg_app','sg_arg','sg_ott','sg_putt','sg_t2g','sg_total']

grouped_df = (final_df.groupby(group_cols)[agg_cols].
 agg(['sum','mean','min','std'])
    ).reset_index()#.query("dg_id == 12577")

In [152]:
grouped_df.columns = [col[0] if col[1] == '' else col[0] + '_' + col[1] for col in grouped_df.columns]

In [153]:
grouped_df.columns

Index(['course_name', 'course_num', 'event_id', 'fin', 'dg_id', 'player_name',
       'year', 'score_sum', 'score_mean', 'score_min', 'score_std',
       'sg_app_sum', 'sg_app_mean', 'sg_app_min', 'sg_app_std', 'sg_arg_sum',
       'sg_arg_mean', 'sg_arg_min', 'sg_arg_std', 'sg_ott_sum', 'sg_ott_mean',
       'sg_ott_min', 'sg_ott_std', 'sg_putt_sum', 'sg_putt_mean',
       'sg_putt_min', 'sg_putt_std', 'sg_t2g_sum', 'sg_t2g_mean', 'sg_t2g_min',
       'sg_t2g_std', 'sg_total_sum', 'sg_total_mean', 'sg_total_min',
       'sg_total_std'],
      dtype='object')

In [68]:
# Historical Betting Odds
response = requests.get(f'https://feeds.datagolf.com/historical-odds/event-list?tour={tour}&key={MY_API_KEY}')
json_data = response.json()

In [70]:
pd.DataFrame(json_data).sort_values()

Unnamed: 0,archived_preds,calendar_year,event_id,event_name,matchups,outrights
0,no,2019,2,Desert Classic,yes,no
1,yes,2020,2,The American Express,yes,yes
2,yes,2021,2,The American Express,yes,yes
3,no,2019,3,Waste Management Phoenix Open,yes,yes
4,yes,2020,3,Waste Management Phoenix Open,yes,yes
...,...,...,...,...,...,...
109,yes,2020,533,Workday Charity Open,yes,yes
110,yes,2021,534,Corales Puntacana Resort & Club Championship #2,yes,yes
111,yes,2021,535,U.S. Open #2,yes,yes
112,yes,2021,536,The Masters #2,yes,yes


In [91]:
book = 'bet365'
event_id = 536
market = "win"
response = requests.get(f'https://feeds.datagolf.com/historical-odds/outrights?tour={tour}&event_id={event_id}&year={year}&market={market}&book={book}&key={MY_API_KEY}')
json_data = response.json()

In [95]:
pd.DataFrame(json_data['odds']).sort_values('bet_outcome_numeric', ascending=False)

Unnamed: 0,bet_outcome_numeric,bet_outcome_text,close_odds,close_time,dg_id,open_odds,open_time,outcome,player_name
20,1,paid in full,46.0,2021-04-08 06:22,13562,46.0,2021-04-05 06:20,1,"Matsuyama, Hideki"
54,0,loss,161.0,2021-04-08 06:22,23323,151.0,2021-04-05 06:20,T12,"Macintyre, Robert"
62,0,loss,226.0,2021-04-08 06:22,6986,226.0,2021-04-05 06:20,CUT,"Johnson, Zach"
61,0,loss,201.0,2021-04-08 06:22,15330,201.0,2021-04-05 06:20,CUT,"Griffin, Lanto"
60,0,loss,201.0,2021-04-08 06:22,23542,201.0,2021-04-05 06:20,T26,"Champ, Cameron"
...,...,...,...,...,...,...,...,...,...
27,0,loss,67.0,2021-04-08 06:22,7672,67.0,2021-04-05 06:20,T26,"Oosthuizen, Louis"
26,0,loss,56.0,2021-04-08 06:22,18079,61.0,2021-04-05 06:20,T40,"Niemann, Joaquin"
25,0,loss,51.0,2021-04-08 06:22,5689,51.0,2021-04-05 06:20,CUT,"Garcia, Sergio"
24,0,loss,51.0,2021-04-08 06:22,12294,51.0,2021-04-05 06:20,T46,"Fleetwood, Tommy"


In [96]:
## Pre-tournament predictions archive

response = requests.get(f'https://feeds.datagolf.com/preds/pre-tournament?event_id={event_id}&year={year}&key=dac62093607b097430db71962680')
json_data = response.json()

In [97]:
json_data

{'baseline': [{'am': 0,
   'country': 'ESP',
   'dg_id': 19195,
   'make_cut': 0.867775,
   'player_name': 'Rahm, Jon',
   'sample_size': 150,
   'top_10': 0.418575,
   'top_20': 0.58935,
   'top_5': 0.28205,
   'win': 0.092075},
  {'am': 0,
   'country': 'USA',
   'dg_id': 22085,
   'make_cut': 0.796175,
   'player_name': 'Morikawa, Collin',
   'sample_size': 150,
   'top_10': 0.277475,
   'top_20': 0.441575,
   'top_5': 0.1665,
   'win': 0.0438},
  {'am': 0,
   'country': 'USA',
   'dg_id': 14636,
   'make_cut': 0.78025,
   'player_name': 'Spieth, Jordan',
   'sample_size': 150,
   'top_10': 0.25205,
   'top_20': 0.408525,
   'top_5': 0.14695,
   'win': 0.037375},
  {'am': 0,
   'country': 'USA',
   'dg_id': 19841,
   'make_cut': 0.7666,
   'player_name': 'DeChambeau, Bryson',
   'sample_size': 150,
   'top_10': 0.2263,
   'top_20': 0.377825,
   'top_5': 0.1308,
   'win': 0.031925},
  {'am': 0,
   'country': 'USA',
   'dg_id': 19895,
   'make_cut': 0.772925,
   'player_name': 'Schauf