In [None]:
import pandas as pd
import numpy as np
import requests
import json
import os
from tqdm.notebook import tqdm
from functools import reduce
from datetime import datetime
import warnings 
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
%load_ext dotenv
%dotenv

from config import API_PATH, tour_lst, config

MY_API_KEY = os.getenv('MY_API_KEY')

In [None]:
def unpack_json_from_api(end_point, key=None):
    """
    Helper function to turn json data from api into a DataFrame
    """
    response = requests.get(end_point)
    json_data = response.json()
    if key is None:
        df = pd.DataFrame(json_data)
    else:
        df = pd.DataFrame(json_data[key])
    return json_data, df

In [None]:
# def get_draft_group_id(contest_obj, contest_type_id=84):
#     draft_groups = contest_obj.draft_groups
#     draft_group_id_lst = []
#     for x in draft_groups:
#         if x.contest_type_id == contest_type_id:
#             draft_group_id_lst.append(x.draft_group_id)
            
#     return min(draft_group_id_lst)

# #draft_group_id = get_draft_group_id(contests)

# def get_draftables(draft_group_id):
#     df_lst = []
#     player_lst = Client().draftables(draft_group_id=draft_group_id).players
#     for player in tqdm(player_lst):
#         name_details = player.name_details
#         first = name_details.first
#         last = name_details.last
#         name = f'{last}, {first}'
#         salary = player.salary

#         df = pd.DataFrame({
#             'player_name': name,
#             'showdown_salary': salary
#         }, index=[0])

#         df_lst.append(df)

#     return pd.concat(df_lst).reset_index(drop=True)

# player_name_map = {
#     'Lee, Kyoung-Hoon':'Lee, Kyounghoon',
#     'Fitzpatrick, Matt':'Fitzpatrick, Matthew'
# }

# draftable_df['player_name'] = (np.where(draftable_df['player_name'].isin(player_name_map.keys()),
#                                         draftable_df['player_name'].map(player_name_map),
#                                         draftable_df['player_name'])
#                               )

# Data-golf
## Extract the current tournament field and relevant details for each golfer

In [None]:
class CurrentTourneyDataGolfExtractor:
    def __init__(self,
                 tour,
                 config=config,
                 api_path=API_PATH,
                 api_key=MY_API_KEY,
                 odds_format='percent',
                ):
        """
        """
        self.tour = tour
        self.config = config
        self.api_path = api_path
        self.api_key = api_key
        self.odds_format = odds_format
    
    def get_field_updates(self, config_key):
        """
        Returns the golfers that are in the field along with their respective daily fantasy salaries
        Data corresponds to: https://datagolf.com/field-updates
        """
        path = self.config[config_key].get('path')
        end_point = f'{self.api_path}{path}?tour={self.tour}&key={self.api_key}'
        data, df = unpack_json_from_api(end_point, 'field')
        for col in data.keys():
            if col != 'field':
                df[col] = data[col]
                
        return df[self.config[config_key]['fields']]
    
    def get_rankings(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        end_point = f'{self.api_path}{path}?&key={self.api_key}'
        df = unpack_json_from_api(end_point, 'rankings')[1]
        
        return df[config[config_key]['fields']]
    
    def get_pre_tourney_preds(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        end_point = f'{self.api_path}{path}?tour={self.tour}&odds_format={self.odds_format}&key={self.api_key}'
        response = requests.get(end_point)
        data = response.json()

        model_dict = {}
        for model in data['models_available']:
            df = pd.DataFrame(data[model])
            pred_cols = ['make_cut','top_10','top_20','top_5','win']
            df.rename(
                columns={x: x + f'_{model}' for x in pred_cols}, inplace=True
            )
            
            model_dict[model] = df
            
        if len(model_dict.keys()) > 1:
            
            cols_to_use = [col for col in model_dict['baseline_history_fit'].columns if 'history' in col] + ['dg_id']
        
            df = (model_dict['baseline']
                  .merge(model_dict['baseline_history_fit'][cols_to_use],
                         how='left',
                         on='dg_id'
                        )
                 )
        
        else:
            df = model_dict['baseline']
    
        return df[config[config_key]['fields']]
    
    def get_player_skill_decomps(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        end_point = f'{self.api_path}{path}?tour={self.tour}&key={self.api_key}'
        df = unpack_json_from_api(end_point, 'players')[1]
        
        return df[config[config_key]['fields']]
    
    def pull_and_merge_dfs(self):
        """
        """
        field_updates = self.get_field_updates('field_updates')
        rankings = self.get_rankings('rankings')
        pre_tourney_preds = self.get_pre_tourney_preds('pre_tourney_preds')
        skill_decomps = self.get_player_skill_decomps('skill_decomps')
        
        dfs=[field_updates,
             rankings,
             pre_tourney_preds,
             skill_decomps
            ]
        
        merged_dfs = reduce(lambda left, right: pd.merge(left, right, on='dg_id', how='left'), dfs)
        
        return merged_dfs

In [None]:
#Pull and merge the data into a single DataFrame
dg_current_extractor = CurrentTourneyDataGolfExtractor('pga')
dg_current = dg_current_extractor.pull_and_merge_dfs()

#Write the data to a csv
#Using a new file name for each tournament
event_name = '_'.join(dg_current['event_name'][0].split()).lower()
year = datetime.now().strftime('%Y')
fn = f'{year}_{event_name}_pre_tourney_snapshot'
dg_current.to_csv(f'data/{fn}.csv', index=False)

# Historical Data

In [15]:
class HistoricalDataGolfExtractor:
    """
    """
    def __init__(self,
                 tour,
                 config=config,
                 api_path=API_PATH,
                 api_key=MY_API_KEY,
                 odds_format='percent',
                ):
            self.tour = tour
            self.config = config
            self.api_path = api_path
            self.api_key = api_key
            self.odds_format = odds_format    
    
    def get_historical_event_ids(self, config_key):
        """
        """
        path = self.config[config_key].get('path')
        end_point = f'{self.api_path}{path}?&key={self.api_key}'
        df = (unpack_json_from_api(end_point)[1]
              .query(f"tour == '{self.tour}'")
              .reset_index(drop=True)
             )
        
        df = df[config[config_key]['fields']]
        return list(zip(df.calendar_year, df.event_id))
    
    def get_historical_round_data(self, config_key, events):
        """
        """
        path = self.config[config_key].get('path')
        event_cols = ['round','year','event_id','event_completed']
        round_cols = [f'round_{round_n}' for round_n in range(1,5)]
        round_lst = []
        for event in tqdm(list(events)):
            year, event_id = event[0], event[1]
            end_point = f'https://feeds.datagolf.com/{path}?tour={self.tour}&event_id={event_id}&year={year}&key={self.api_key}'
            df = unpack_json_from_api(end_point)[1]
            for idx in range(df.shape[0]):
                player_scores = df['scores'][idx]
                player_info = pd.DataFrame({k: v for k, v in player_scores.items() if k not in round_cols}, index=[0])

                for r in round_cols:
                    round_data = player_scores.get(r)
                    if round_data is not None:
                        round_df = pd.DataFrame(round_data, index=[0])
                        round_df['round'] = r
                        round_df['year'] = year
                        round_df['event_id'] = event_id
                        round_df['event_completed'] = df['event_completed'][0]
                        round_df['event_name'] =  df['event_name'][0]
                        
                        round_lst.append(pd.concat([player_info, round_df], axis = 1))
        
        return pd.concat(round_lst).reset_index(drop=True)
                        
    def get_historical_odds(self, config_key, events, market, book):
        """
        """
        path = self.config[config_key].get('path')
        
        #Doesn't have odd data prior to 2019
        for event in tqdm([x for x in events if x[0] >= 2019]):
            year, event_id = event[0], event[1]
            end_point = f'https://feeds.datagolf.com/{path}?tour={self.tour}&event_id={event_id}&year={year}&market={market}&book={book}&key={self.api_key}'
            response = requests.get(end_point)
            if response.status_code == 200:
                df = pd.DataFrame(response.json())
                for idx in range(df.shape[0]):
                    player_odds = pd.DataFrame(df['odds'][idx], index = [0])
                    player_odds['year'] = year
                    player_odds['event_id'] = event_id
                    
                    odds_lst.append(player_odds)
            else:
                print(f"Bad response for {event}")
                
        hist_odd_df = pd.concat(odds_lst).reset_index(drop=True)
        return hist_odd_df[config[config_key]['fields']]
                        
        

In [None]:
#Pull historical data for the PGA
dg_hist_extractor = HistoricalDataGolfExtractor(tour='pga')
events = dg_hist_extractor.get_historical_event_ids('historical_event_lst')
round_data = dg_hist_extractor.get_historical_round_data('historical_round_data', events)
odds = dg_hist_extractor.get_historical_odds('historical_odds', events, 'win', 'bet365')

#Write data to csv
#round_data.to_csv('data/historical_round_scores.csv', index=False)

HBox(children=(FloatProgress(value=0.0, max=213.0), HTML(value='')))