# Import Packages

In [None]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime

# Scrape Upcoming Matches

Using the Tennis Live API, two separate API calls are made. The first retrieves a list of matches for a specified day and the second retrieves a list of current player rankings. Those are then combined to give us new rows to append to the bottom of the `full_df` in the `ATP_data_cleaning` notebook.

In [87]:
def scrapeFutureMatches(date): # date in format yyyy-mm-dd
    
    # Matches API request
    # Concatenate base URL and date
    base_url = "https://tennis-live-data.p.rapidapi.com/matches-by-date/"
    url = base_url + date

    # API call to get the matches for a given day
    headers = {
        "X-RapidAPI-Key": "00229723ddmsh7d1fdfd7b967bd6p1e6ca6jsn0b3e25e2c461",
        "X-RapidAPI-Host": "tennis-live-data.p.rapidapi.com"
    }
    matches_response = requests.request("GET", url, headers=headers)

    # flatten and format the JSON
    matches_df = pd.json_normalize(matches_response.json()['results'])
    matches_df = matches_df.explode('matches')
    matches_df = matches_df['matches'].apply(pd.Series)[['status', 'home_player', 'away_player', 'date']]
    matches_df['home_player'] = matches_df['home_player'].apply(lambda x: str(x.split()[0]))
    matches_df['away_player'] = matches_df['away_player'].apply(lambda x: str(x.split()[0]))

    # duplicate rows to make the dataframe tidy by player
    opp = matches_df.rename(columns={'home_player': 'away_player', 'away_player': 'home_player'})
    matches_df = pd.concat([matches_df, opp]).rename(columns={'home_player': 'player', 'away_player': 'opponent'}) 
    matches_df = matches_df.loc[0]

    

    # Rankings API request
    # API call to get the rank points for ATP players
    url = "https://tennis-live-data.p.rapidapi.com/rankings/ATP"
    headers = {
        "X-RapidAPI-Key": "00229723ddmsh7d1fdfd7b967bd6p1e6ca6jsn0b3e25e2c461",
        "X-RapidAPI-Host": "tennis-live-data.p.rapidapi.com"
    }
    rankings_response = requests.request("GET", url, headers=headers)

    # Flatten the JSON and rename columns
    rankings_df = pd.json_normalize(rankings_response.json()['results']['rankings'])
    rankings_df = rankings_df[['full_name', 'ranking_points']]
    rankings_df['player'] = rankings_df['full_name'].apply(lambda x: str(x.split()[-1]))
    rankings_df = rankings_df.rename(columns={'full_name': 'player_full_name', 'ranking_points': 'player_ranking_points'})
    
    
    
    # Joining the two dataframes
    # Merge to get player names and rank points
    matches_df['player']=matches_df['player'].astype(str)
    rankings_df['player']=rankings_df['player'].astype(str)
    info_df = matches_df.merge(rankings_df, on='player', how='left')

    # Merge to get opponent names and rank points
    opp_rankings_df = rankings_df.rename(columns={'player_full_name': 'opp_full_name', 'player_ranking_points': 'opp_ranking_points', 'player': 'opponent'})
    info_df = info_df.merge(opp_rankings_df, on='opponent', how='left')
    
    # Clean and save resulting dataframe
    matches = info_df.drop(columns = info_df[['player', 'opponent']])
    matches = matches.rename(columns={'player_full_name': 'name', 'opp_full_name': 'opp_name', 'player_ranking_points': 'rank_points', 'opp_ranking_points': 'opp_rank_points'})
    matches['tourney_date'] = matches['date'].apply(lambda x: x[:10].replace('-', ''))
    future_matches = matches.drop(['status', 'date'], axis=1)
    
    return future_matches

In [95]:
# Change input date to today
upcoming_matches_df = scrapeFutureMatches("2022-07-10")
upcoming_matches_df.head()

Unnamed: 0,name,rank_points,opp_name,opp_rank_points,tourney_date
0,Novak Djokovic,4770,Nick Kyrgios,710,20220710
1,Nick Kyrgios,710,Novak Djokovic,4770,20220710


# Scrape PrizePicks Lines

Use helper function from the [PrizePicks API documentation](https://github.com/PrizePicks-Analytics/PrizePicks-API) to return data.

In [93]:
# Use helper function
def call_endpoint(url, max_level=3, include_new_player_attributes=False):
    '''
    takes: 
        - url (str): the API endpoint to call
        - max_level (int): level of json normalizing to apply
        - include_player_attributes (bool): whether to include player object attributes in the returned dataframe
    returns:
        - df (pd.DataFrame): a dataframe of the call response content
    '''
    resp = requests.get(url).json()
    data = pd.json_normalize(resp['data'], max_level=max_level)
    included = pd.json_normalize(resp['included'], max_level=max_level)
    if include_new_player_attributes:
        inc_cop = included[included['type'] == 'new_player'].copy().dropna(axis=1)
        data = pd.merge(data,
                        inc_cop,
                        how='left',
                        left_on=['relationships.new_player.data.id', 'relationships.new_player.data.type'],
                        right_on=['id', 'type'],
                        suffixes=('', '_new_player'))
    return data

# Create dataframe using the helper function
url = 'https://partner-api.prizepicks.com/projections?per_page=100'
df = call_endpoint(url, include_new_player_attributes=True)
lines = df[['attributes.league', 'attributes.team', 'attributes.name', 'attributes.stat_type', 'attributes.line_score', 'attributes.start_time']]
lines = lines.rename(columns={
    "attributes.line_score": "prizepicks_line",
    "attributes.start_time": "Start Time",
    "attributes.stat_type": "Stat Type",
    "attributes.name": "name",
    "attributes.team": "Team Name",
    "attributes.league": "League"
})

# Format start time
lines['Start Date'] = ""
for i in range(len(lines['Start Time'])):
    temp_date = datetime.fromisoformat(lines['Start Time'][i])
    lines['Start Date'][i] = temp_date.date()
    lines['Start Time'][i] = temp_date.hour
lines['tourney_date'] = lines['Start Date'].apply(lambda x: x.strftime("%Y%m%d"))
    
    
# Filter to only include tennis, only fantasy points
lines = lines[lines['League'] == 'TENNIS']
lines = lines[lines['Stat Type'] == 'Fantasy Score']

# Drop unnecessary cols
prizepicks_df = lines.drop(columns=['League', 'Team Name', 'Stat Type', 'Start Time', 'Start Date'])


# Show dataframe and save to CSV
prizepicks_df.head()
# lines.to_csv(r'June 28 Tennis.csv')

Unnamed: 0,name,prizepicks_line,tourney_date
3,Paula Badosa,14.5,20220806
7,Andrey Rublev,24.0,20220806
8,Daniil Medvedev,23.0,20220806


# Combine Dataframes

`prizepicks_line` is not a column that exists in the historical ATP dataset, so it will only be used for the sake of assessing whether our predictions outperform expectations or not.

In [None]:
upcoming_df = prizepicks_df.merge(upcoming_matches_df, how="inner", on=['name', 'tourney_date'])
upcoming_df.head()

In [None]:
# Save to CSV
file_name = "future_matches_" + date + ".csv"
upcoming_df.to_csv(file_name)