In [30]:
import requests
import json
import pandas as pd
from functools import reduce
from datetime import datetime

In [19]:
MY_API_KEY = 'dac62093607b097430db71962680'
API_PATH = 'https://feeds.datagolf.com/'
FILE_FORMAT = 'json'

In [50]:
tour_lst = [
    'PGA',
    'OPP',
    'EUR',
    'KFT',
]

In [332]:
config = {
    'field_updates':
    [
        'dg_id',
        'player_name',
        'country',
        'event_name',
        'current_round',
        'dk_id',
        'dk_salary',
        'fd_id',
        'fd_salary',
        'yh_id',
        'yh_salary',
        'early_late',
        'last_updated',
    ],
    'rankings':
    [
        'dg_id',
        #'player_name',
        'primary_tour',
        'datagolf_rank',
        'owgr_rank',
        'dg_skill_estimate'
    ],
    'pre_tourney_preds':
    [
        'dg_id',
        #'player_name',
        'make_cut_baseline',
        'top_10_baseline', 
        'top_20_baseline', 
        'top_5_baseline',
        'win_baseline', 
        'make_cut_baseline_history_fit',
        'top_10_baseline_history_fit', 
        'top_20_baseline_history_fit',
        'top_5_baseline_history_fit', 
        'win_baseline_history_fit'
    ],
    'skill_decomps':
    ['dg_id',
     #'player_name',
     'baseline_pred', 
     'timing_adjustment',
     'age_adjustment',
     #'cf_approach_comp',
     #'cf_short_comp',
     'strokes_gained_category_adjustment',
     'course_experience_adjustment',
     'course_history_adjustment', 
     'total_course_history_adjustment', 
     'total_fit_adjustment',
     'driving_accuracy_adjustment',
     'driving_distance_adjustment', 
     'other_fit_adjustment',
     'final_pred',
    ]
}

In [333]:
def get_field_updates(path, tour, key=MY_API_KEY):
    response = requests.get(f'{API_PATH}{path}?tour={tour}&key={key}')
    data = response.json()
    df = pd.DataFrame(data['field'])
    
    for col in data.keys():
        if col != 'field':
            df[col] = data[col]
    
    return df[config['field_updates']]

def get_rankings(path, key=MY_API_KEY):
    response = requests.get(f'{API_PATH}{path}?&key={key}')
    data = response.json()
    df = pd.DataFrame(data['rankings'])
    
    return df[config['rankings']]

def get_pre_tourney_preds(path, tour, odds_format='percent', key=MY_API_KEY):
    response = requests.get(f'{API_PATH}{path}?tour={tour}&odds_format={odds_format}&key={key}')
    data = response.json()
    
    model_dict = {}
    for model in data['models_available']:
        df = pd.DataFrame(data[model])
        pred_cols = ['make_cut','top_10','top_20','top_5','win']
        df.rename(
            columns={x: x + f'_{model}' for x in pred_cols}, inplace=True
        )
        
        model_dict[model] = df
        
    cols_to_use = [col for col in model_dict['baseline_history_fit'].columns if 'history' in col] + ['dg_id']
    
    df = (model_dict['baseline']
          .merge(model_dict['baseline_history_fit'][cols_to_use],
                 how='left',
                 on='dg_id'
                )
         )
    
    return df[config['pre_tourney_preds']]

def get_player_skill_decomps(path, tour, key=MY_API_KEY):
    response = requests.get(f'{API_PATH}{path}?tour={tour}&key={key}')
    data = response.json()
    
    df = pd.DataFrame(data['players'])
    
    return df[config['skill_decomps']]

In [369]:
def merge_dfs(dfs):
    merged_dfs = reduce(lambda left, right: pd.merge(left, right, on='dg_id', how='left'), dfs)
    
    return merged_dfs

In [370]:
field_updates_df = get_field_updates('field-updates', 'pga')
rank_df = get_rankings('preds/get-dg-rankings')
pred_data = get_pre_tourney_preds('preds/pre-tournament', 'pga')
skill_df = get_player_skill_decomps('preds/player-decompositions?', 'pga')

In [372]:
merged_df = merge_dfs([field_updates_df, rank_df, pred_data, skill_df])

In [383]:
merged_df.to_csv(f'data_golf_{now.strftime("%Y-%m-%d")}.csv', index=False)