In [6]:
import pandas as pd
import numpy as np
import requests
import json
import os
from tqdm.notebook import tqdm
from functools import reduce
from datetime import datetime
import warnings 
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
%load_ext dotenv
%dotenv

from config import API_PATH, tour_lst, config

MY_API_KEY = os.getenv('MY_API_KEY')

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [7]:
def get_draft_group_id(contest_obj, contest_type_id=84):
    draft_groups = contest_obj.draft_groups
    draft_group_id_lst = []
    for x in draft_groups:
        if x.contest_type_id == contest_type_id:
            draft_group_id_lst.append(x.draft_group_id)
            
    return min(draft_group_id_lst)

#draft_group_id = get_draft_group_id(contests)

In [8]:
def get_draftables(draft_group_id):
    df_lst = []
    player_lst = Client().draftables(draft_group_id=draft_group_id).players
    for player in tqdm(player_lst):
        name_details = player.name_details
        first = name_details.first
        last = name_details.last
        name = f'{last}, {first}'
        salary = player.salary

        df = pd.DataFrame({
            'player_name': name,
            'showdown_salary': salary
        }, index=[0])

        df_lst.append(df)

    return pd.concat(df_lst).reset_index(drop=True)

# Data-golf
## Current Rounds

In [9]:
class CurrentDataGolfExtractor:
    def __init__(self,
                 tour,
                 config=config,
                 api_path=API_PATH,
                 api_key=MY_API_KEY,
                 odds_format='percent',
                ):
        """
        """
        self.tour = tour
        self.config = config
        self.api_path = api_path
        self.api_key = api_key
        self.odds_format = odds_format
    
    def get_field_updates(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?tour={self.tour}&key={self.api_key}')
        data = response.json()
        df = pd.DataFrame(data['field'])
        for col in data.keys():
            if col != 'field':
                df[col] = data[col]
                
        return df[self.config[config_key]['fields']]
    
    def get_rankings(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?&key={self.api_key}')
        data = response.json()
        df = pd.DataFrame(data['rankings'])
        
        return df[config[config_key]['fields']]
    
    def get_pre_tourney_preds(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?tour={self.tour}&odds_format={self.odds_format}&key={self.api_key}')
        data = response.json()

        model_dict = {}
        for model in data['models_available']:
            df = pd.DataFrame(data[model])
            pred_cols = ['make_cut','top_10','top_20','top_5','win']
            df.rename(
                columns={x: x + f'_{model}' for x in pred_cols}, inplace=True
            )
            
            model_dict[model] = df
        
        cols_to_use = [col for col in model_dict['baseline_history_fit'].columns if 'history' in col] + ['dg_id']
        
        df = (model_dict['baseline']
              .merge(model_dict['baseline_history_fit'][cols_to_use],
                     how='left',
                     on='dg_id'
                    )
             )
    
        return df[config[config_key]['fields']]
    
    def get_player_skill_decomps(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?tour={self.tour}&key={self.api_key}')
        data = response.json()
        df = pd.DataFrame(data['players'])
        
        return df[config[config_key]['fields']]
    
    def pull_and_merge_dfs(self):
        """
        """
        field_updates = self.get_field_updates('field_updates')
        rankings = self.get_rankings('rankings')
        pre_tourney_preds = self.get_pre_tourney_preds('pre_tourney_preds')
        skill_decomps = self.get_player_skill_decomps('skill_decomps')
        
        dfs=[field_updates,
             rankings,
             #pre_tourney_preds,
             skill_decomps]
        merged_dfs = reduce(lambda left, right: pd.merge(left, right, on='dg_id', how='left'), dfs)
        
        return merged_dfs

In [10]:
dg_extractor = CurrentDataGolfExtractor('pga')
dg_df = dg_extractor.pull_and_merge_dfs()

KeyError: 'baseline_history_fit'

In [11]:
# now = datetime.now().strftime('%Y_%m_%d')
# dg_df.to_csv(f'pre_tourney_snapshot_{now}.csv', index=False)

In [12]:
# player_name_map = {
#     'Lee, Kyoung-Hoon':'Lee, Kyounghoon',
#     'Fitzpatrick, Matt':'Fitzpatrick, Matthew'
# }

# draftable_df['player_name'] = (np.where(draftable_df['player_name'].isin(player_name_map.keys()),
#                                         draftable_df['player_name'].map(player_name_map),
#                                         draftable_df['player_name'])
#                               )

## Historical Data

In [109]:
class HistoricalDataGolfExtractor:
    def __init__(self,
                 tour,
                 config=config,
                 api_path=API_PATH,
                 api_key=MY_API_KEY,
                 odds_format='percent',
                ):
            self.tour = tour
            self.config = config
            self.api_path = api_path
            self.api_key = api_key
            self.odds_format = odds_format    
    
    def get_historical_event_ids(self, config_key):
        path = self.config[config_key].get('path')
        response = requests.get(f'{self.api_path}{path}?&key={self.api_key}')
        df = (pd.DataFrame(response.json())
              .query(f"tour == '{self.tour}'")
              .reset_index(drop=True)
             )
        
        df = df[config[config_key]['fields']]
        return zip(df.calendar_year, df.event_id)
    
    def get_historical_round_data(self, config_key, events):
        path = self.config[config_key].get('path')
        event_cols = ['round','year','event_id','event_completed']
        round_cols = [f'round_{round_n}' for round_n in range(1,5)]
        round_lst = []
        for event in tqdm(list(events)):
            year, event_id = event[0], event[1]
            response = requests.get(f'https://feeds.datagolf.com/{path}?tour={self.tour}&event_id={event_id}&year={year}&key={self.api_key}')
            df = pd.DataFrame(response.json())
            event_completed = df['event_completed'][0]
            event_name = df['event_name'][0]
            for idx in range(df.shape[0]):
                player_scores = df['scores'][idx]
                player_info = pd.DataFrame({k: v for k, v in player_scores.items() if k not in rounds}, index=[0])

                for r in rounds:
                    round_data = player_scores.get(r)
                    if round_data is not None:
                        round_df = pd.DataFrame(round_data, index=[0])
                        round_df['round'] = r
                        round_df['year'] = year
                        round_df['event_id'] = event_id
                        round_df['event_completed'] = event_completed
                        round_df['event_name'] = event_name
                        
                        round_lst.append(pd.concat([player_info, round_df], axis = 1))
                        
        return pd.concat(round_lst).reset_index(drop=True)

In [110]:
#response = requests.get(f'https://feeds.datagolf.com/historical-raw-data/rounds?tour=pga&event_id=2&year=2017&key={MY_API_KEY}')

In [111]:
#pd.DataFrame(response.json())

In [112]:
#pd.DataFrame(response.json()).scores[33]

In [115]:
hist_dg_extractor = HistoricalDataGolfExtractor(tour='pga')
events = hist_dg_extractor.get_historical_event_ids('historical_event_lst')
round_data = hist_dg_extractor.get_historical_round_data('historical_round_data', events)

HBox(children=(FloatProgress(value=0.0, max=213.0), HTML(value='')))




In [116]:
round_data.query("(dg_id == 19195) & (year == 2017) & (event_id == 2)")

Unnamed: 0,course_name,course_num,dg_id,event_completed,event_id,event_name,fin_text,player_name,round,score,sg_app,sg_arg,sg_ott,sg_putt,sg_t2g,sg_total,year
83232,La Quinta CC,202,19195,2017-01-22,2,CareerBuilder Challenge,T34,"Rahm, Jon",round_1,71,,,,,,-0.846,2017
83233,Nicklaus Tournament Course,233,19195,2017-01-22,2,CareerBuilder Challenge,T34,"Rahm, Jon",round_2,66,,,,,,4.365,2017
83234,Stadium Course,704,19195,2017-01-22,2,CareerBuilder Challenge,T34,"Rahm, Jon",round_3,70,2.373,-2.344,0.54,2.025,0.57,2.596,2017
83235,Stadium Course,704,19195,2017-01-22,2,CareerBuilder Challenge,T34,"Rahm, Jon",round_4,72,-2.267,-0.325,0.761,1.13,-1.831,-0.701,2017


In [117]:
round_data.to_csv('data/historical_round_scores.csv', index=False)

In [None]:
import statsmodels.api as sm
X = grouped_df[['sg_putt_sum','sg_t2g_sum']]
y = grouped_df['fin_num']

In [None]:
m = sm.OLS.from_formula('fin_num ~ sg_putt_sum + sg_t2g_sum + sg_arg_std', data = grouped_df)

In [None]:
res = m.fit()

In [None]:
print(res.summary())

In [None]:
grouped_df.fin_num.value_counts()

In [None]:
# Historical Betting Odds
response = requests.get(f'https://feeds.datagolf.com/historical-odds/event-list?tour={tour}&key={MY_API_KEY}')
json_data = response.json()

In [None]:
book = 'bet365'
event_id = 536
market = "win"
response = requests.get(f'https://feeds.datagolf.com/historical-odds/outrights?tour={tour}&event_id={event_id}&year={year}&market={market}&book={book}&key={MY_API_KEY}')
json_data = response.json()

In [None]:
pd.DataFrame(json_data['odds']).sort_values('bet_outcome_numeric', ascending=False)

In [None]:
## Pre-tournament predictions archive
response = requests.get(f'https://feeds.datagolf.com/preds/pre-tournament?event_id={event_id}&year={year}&key=dac62093607b097430db71962680')
json_data = response.json()