In [3]:
# load packages


import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 500)
import requests
from datetime import datetime # used to change from ISO 8601 to a more readable format

In [4]:
# scrape lines from prizepicks


# use helper function from the API documentation (link below) to return data
# https://github.com/PrizePicks-Analytics/PrizePicks-API
def call_endpoint(url, max_level=3, include_new_player_attributes=False):
    '''
    takes: 
        - url (str): the API endpoint to call
        - max_level (int): level of json normalizing to apply
        - include_player_attributes (bool): whether to include player object attributes in the returned dataframe
    returns:
        - df (pd.DataFrame): a dataframe of the call response content
    '''
    resp = requests.get(url).json()
    data = pd.json_normalize(resp['data'], max_level=max_level)
    included = pd.json_normalize(resp['included'], max_level=max_level)
    if include_new_player_attributes:
        inc_cop = included[included['type'] == 'new_player'].copy().dropna(axis=1)
        data = pd.merge(data
                        , inc_cop
                        , how='left'
                        , left_on=['relationships.new_player.data.id'
                                   ,'relationships.new_player.data.type']
                        , right_on=['id', 'type']
                        , suffixes=('', '_new_player'))
    return data

# create dataframe using the helper function
url = 'https://partner-api.prizepicks.com/projections?per_page=100'
df = call_endpoint(url, include_new_player_attributes=True)
lines = df[['attributes.league', 'attributes.team', 'attributes.name', 'attributes.stat_type', 'attributes.line_score', 'attributes.start_time']]
lines = lines.rename(columns={"attributes.line_score": "Projected Score",
                   "attributes.start_time": "Start Time",
                   'attributes.stat_type': "Stat Type",
                   'attributes.name': "Player Name",
                   'attributes.team': "Team Name",
                   'attributes.league': "League"
                  })

# formatting start time
lines['Start Date'] = ""
for i in range(len(lines['Start Time'])):
    temp_date = datetime.fromisoformat(lines['Start Time'][i])
    lines['Start Date'][i] = temp_date.date()
    lines['Start Time'][i] = temp_date.hour
    
    
# filter to only include tennis
lines = lines[lines['League'] == 'TENNIS']

# show dataframe and save to CSV
lines.head()
# lines.to_csv(r'June 28 Tennis.csv')

Unnamed: 0,League,Team Name,Player Name,Stat Type,Projected Score,Start Time,Start Date
0,TENNIS,Frances Tiafoe,Frances Tiafoe,Fantasy Score,16.5,6,2022-07-01
1,TENNIS,Tim Van Rijthoven,Tim Van Rijthoven,Fantasy Score,25.0,6,2022-07-01
6,TENNIS,Tommy Paul,Tommy Paul,Fantasy Score,21.5,8,2022-07-01
7,TENNIS,David Goffin,David Goffin,Fantasy Score,16.5,8,2022-07-01
9,TENNIS,Novak Djokovic,Novak Djokovic,Fantasy Score,32.5,9,2022-07-01
